You can subscribe to this list here.
2007 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
(120) |
Sep
(36) |
Oct
(116) |
Nov
(17) |
Dec
(44) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2008 |
Jan
(143) |
Feb
(192) |
Mar
(74) |
Apr
(84) |
May
(105) |
Jun
(64) |
Jul
(49) |
Aug
(120) |
Sep
(159) |
Oct
(156) |
Nov
(51) |
Dec
(28) |
2009 |
Jan
(17) |
Feb
(55) |
Mar
(33) |
Apr
(57) |
May
(54) |
Jun
(28) |
Jul
(6) |
Aug
(16) |
Sep
(38) |
Oct
(30) |
Nov
(26) |
Dec
(52) |
2010 |
Jan
(7) |
Feb
(91) |
Mar
(65) |
Apr
(2) |
May
(14) |
Jun
(25) |
Jul
(38) |
Aug
(48) |
Sep
(80) |
Oct
(70) |
Nov
(75) |
Dec
(77) |
2011 |
Jan
(68) |
Feb
(53) |
Mar
(51) |
Apr
(35) |
May
(65) |
Jun
(101) |
Jul
(29) |
Aug
(230) |
Sep
(95) |
Oct
(49) |
Nov
(110) |
Dec
(63) |
2012 |
Jan
(41) |
Feb
(42) |
Mar
(25) |
Apr
(46) |
May
(51) |
Jun
(44) |
Jul
(45) |
Aug
(29) |
Sep
(12) |
Oct
(9) |
Nov
(17) |
Dec
(2) |
2013 |
Jan
(12) |
Feb
(14) |
Mar
(7) |
Apr
(16) |
May
(54) |
Jun
(27) |
Jul
(11) |
Aug
(5) |
Sep
(85) |
Oct
(27) |
Nov
(37) |
Dec
(32) |
2014 |
Jan
(8) |
Feb
(29) |
Mar
(5) |
Apr
(3) |
May
(22) |
Jun
(3) |
Jul
(4) |
Aug
(3) |
Sep
|
Oct
|
Nov
|
Dec
|
From: <dc...@us...> - 2012-10-30 12:41:16
|
Revision: 3859 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3859&view=rev Author: dcherix Date: 2012-10-30 12:41:05 +0000 (Tue, 30 Oct 2012) Log Message: ----------- bux fixes Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/ABoxQueryGenerator.java trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/SparqlSimpleExtractor.java Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/ABoxQueryGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/ABoxQueryGenerator.java 2012-10-18 08:19:13 UTC (rev 3858) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/ABoxQueryGenerator.java 2012-10-30 12:41:05 UTC (rev 3859) @@ -19,12 +19,14 @@ public String createQuery(Set<String> individuals, String aboxfilter) { Monitor monABoxQueryGeneration = MonitorFactory.getTimeMonitor("ABox query generator").start(); StringBuilder builder = new StringBuilder(); + builder.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"); builder.append("CONSTRUCT {?s ?p ?o } "); - builder.append("{ ?s ?p ?o . "); + builder.append("{ ?s ?p ?o . " ); builder.append(makeInFilter("?s", individuals)); if (aboxfilter != null) { builder.append(aboxfilter); } + builder.append("FILTER (! (?p=rdf:type))"); builder.append("}"); monABoxQueryGeneration.stop(); return builder.toString(); Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/SparqlSimpleExtractor.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/SparqlSimpleExtractor.java 2012-10-18 08:19:13 UTC (rev 3858) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/SparqlSimpleExtractor.java 2012-10-30 12:41:05 UTC (rev 3859) @@ -158,11 +158,11 @@ ABoxQueryGenerator aGenerator = new ABoxQueryGenerator(); for (int i = 0; i < recursionDepth; i++) { if (instancesSet.isEmpty()) { - log.warn("no new instances found more recursions (recursion " + i + ") " + instancesSet.size() + " new instances"); + log.warn("no new instances found more recursions (recursion {} ) {} new instances", i,instancesSet.size()); } - log.info("processing (recursion " + i + ") " + instancesSet.size() + " new instances"); + log.info("processing (recursion {} ) {} new instances",i,instancesSet.size()); queryString = aGenerator.createQuery(instancesSet, aboxfilter); // System.out.println(queryString); log.debug("SPARQL: {}", queryString); @@ -185,7 +185,8 @@ } - + log.info("recursion depth: {} reached, {} new instances",recursionDepth,instancesSet.size()); + //queryString = aGenerator.createLastQuery(instances, model, filters); //log.debug("SPARQL: {}", queryString); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-10-18 08:19:23
|
Revision: 3858 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3858&view=rev Author: lorenz_b Date: 2012-10-18 08:19:13 +0000 (Thu, 18 Oct 2012) Log Message: ----------- Added workaround for ELK reasoner and type checking. Has to be remove because this won't work for complex class expressions. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/reasoning/OWLAPIReasoner.java Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/OWLAPIReasoner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/reasoning/OWLAPIReasoner.java 2012-10-17 21:35:33 UTC (rev 3857) +++ trunk/components-core/src/main/java/org/dllearner/reasoning/OWLAPIReasoner.java 2012-10-18 08:19:13 UTC (rev 3858) @@ -636,12 +636,20 @@ public boolean hasTypeImpl(Description concept, Individual individual) { boolean test = false; OWLClassExpression d = OWLAPIDescriptionConvertVisitor.getOWLClassExpression(concept); - OWLIndividual i = factory.getOWLNamedIndividual(IRI.create(individual.getName())); - try { - test = reasoner.isEntailed(factory.getOWLClassAssertionAxiom(d, i)); - } catch (Exception e) { - test = true; + OWLNamedIndividual i = factory.getOWLNamedIndividual(IRI.create(individual.getName())); +// try { +// test = reasoner.isEntailed(factory.getOWLClassAssertionAxiom(d, i)); +// } catch (Exception e) { +// test = true; +// } + if(d.isAnonymous()){ + throw new UnsupportedOperationException("Can not do type checking for complex class expressions."); } + for(OWLClass type : reasoner.getTypes(i, false).getFlattened()){ + if(type.equals(d.asOWLClass())){ + return true; + } + } return test; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ed...@us...> - 2012-10-17 21:35:42
|
Revision: 3857 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3857&view=rev Author: edka Date: 2012-10-17 21:35:33 +0000 (Wed, 17 Oct 2012) Log Message: ----------- OWLAPIReasoner with catch-try-blocks for not implemented methods Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/reasoning/OWLAPIReasoner.java Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/OWLAPIReasoner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/reasoning/OWLAPIReasoner.java 2012-10-16 02:29:49 UTC (rev 3856) +++ trunk/components-core/src/main/java/org/dllearner/reasoning/OWLAPIReasoner.java 2012-10-17 21:35:33 UTC (rev 3857) @@ -39,7 +39,6 @@ import org.apache.log4j.Level; import org.apache.log4j.Logger; -import org.dllearner.core.AbstractKnowledgeSource; import org.dllearner.core.AbstractReasonerComponent; import org.dllearner.core.ComponentAnn; import org.dllearner.core.ComponentInitException; @@ -60,11 +59,7 @@ import org.dllearner.core.owl.Thing; import org.dllearner.core.owl.TypedConstant; import org.dllearner.core.owl.UntypedConstant; -import org.dllearner.kb.OWLAPIOntology; -import org.dllearner.kb.OWLFile; import org.dllearner.kb.OWLOntologyKnowledgeSource; -import org.dllearner.kb.sparql.SparqlKnowledgeSource; -import org.dllearner.kb.sparql.simple.SparqlSimpleExtractor; import org.dllearner.utilities.owl.ConceptComparator; import org.dllearner.utilities.owl.DLLearnerDescriptionConvertVisitor; import org.dllearner.utilities.owl.OWLAPIAxiomConvertVisitor; @@ -104,9 +99,7 @@ import org.semanticweb.owlapi.model.UnknownOWLOntologyException; import org.semanticweb.owlapi.owllink.OWLlinkHTTPXMLReasonerFactory; import org.semanticweb.owlapi.owllink.OWLlinkReasonerConfiguration; -import org.semanticweb.owlapi.reasoner.FreshEntitiesException; import org.semanticweb.owlapi.reasoner.FreshEntityPolicy; -import org.semanticweb.owlapi.reasoner.InconsistentOntologyException; import org.semanticweb.owlapi.reasoner.IndividualNodeSetPolicy; import org.semanticweb.owlapi.reasoner.InferenceType; import org.semanticweb.owlapi.reasoner.Node; @@ -114,10 +107,8 @@ import org.semanticweb.owlapi.reasoner.NullReasonerProgressMonitor; import org.semanticweb.owlapi.reasoner.OWLReasoner; import org.semanticweb.owlapi.reasoner.OWLReasonerConfiguration; -import org.semanticweb.owlapi.reasoner.ReasonerInterruptedException; import org.semanticweb.owlapi.reasoner.ReasonerProgressMonitor; import org.semanticweb.owlapi.reasoner.SimpleConfiguration; -import org.semanticweb.owlapi.reasoner.TimeOutException; import org.semanticweb.owlapi.util.SimpleIRIMapper; import org.semanticweb.owlapi.vocab.PrefixOWLOntologyFormat; import org.springframework.beans.propertyeditors.StringTrimmerEditor; @@ -130,988 +121,1093 @@ import de.tudresden.inf.lat.cel.owlapi.CelReasoner; /** - * Mapping to OWL API reasoner interface. The OWL API currently - * supports three reasoners: FaCT++, HermiT and Pellet. FaCT++ is connected - * using JNI and native libraries, while HermiT and Pellet are pure Java - * libraries. - * + * Mapping to OWL API reasoner interface. The OWL API currently supports three + * reasoners: FaCT++, HermiT and Pellet. FaCT++ is connected using JNI and + * native libraries, while HermiT and Pellet are pure Java libraries. + * * @author Jens Lehmann */ @ComponentAnn(name = "OWL API Reasoner", shortName = "oar", version = 0.8) public class OWLAPIReasoner extends AbstractReasonerComponent { -// private static Logger logger = Logger -// .getLogger(OWLAPIReasoner.class); + // private static Logger logger = Logger + // .getLogger(OWLAPIReasoner.class); - //private String reasonerType = "pellet"; - private OWLReasoner reasoner; - private OWLOntologyManager manager; + // private String reasonerType = "pellet"; + private OWLReasoner reasoner; + private OWLOntologyManager manager; - private OWLOntology ontology; - // the data factory is used to generate OWL API objects - private OWLDataFactory factory; - // static factory -// private static OWLDataFactory staticFactory = OWLManager.createOWLOntologyManager().getOWLDataFactory(); + private OWLOntology ontology; + // the data factory is used to generate OWL API objects + private OWLDataFactory factory; + // static factory + // private static OWLDataFactory staticFactory = + // OWLManager.createOWLOntologyManager().getOWLDataFactory(); - private ConceptComparator conceptComparator = new ConceptComparator(); - private RoleComparator roleComparator = new RoleComparator(); -// private ClassHierarchy subsumptionHierarchy; -// private ObjectPropertyHierarchy roleHierarchy; -// private DatatypePropertyHierarchy datatypePropertyHierarchy; -// private Set<Description> allowedConceptsInSubsumptionHierarchy; + private ConceptComparator conceptComparator = new ConceptComparator(); + private RoleComparator roleComparator = new RoleComparator(); + // private ClassHierarchy subsumptionHierarchy; + // private ObjectPropertyHierarchy roleHierarchy; + // private DatatypePropertyHierarchy datatypePropertyHierarchy; + // private Set<Description> allowedConceptsInSubsumptionHierarchy; - // primitives - Set<NamedClass> atomicConcepts = new TreeSet<NamedClass>(conceptComparator); - Set<ObjectProperty> atomicRoles = new TreeSet<ObjectProperty>(roleComparator); - SortedSet<DatatypeProperty> datatypeProperties = new TreeSet<DatatypeProperty>(); - SortedSet<DatatypeProperty> booleanDatatypeProperties = new TreeSet<DatatypeProperty>(); - SortedSet<DatatypeProperty> doubleDatatypeProperties = new TreeSet<DatatypeProperty>(); - SortedSet<DatatypeProperty> intDatatypeProperties = new TreeSet<DatatypeProperty>(); - SortedSet<DatatypeProperty> stringDatatypeProperties = new TreeSet<DatatypeProperty>(); - SortedSet<Individual> individuals = new TreeSet<Individual>(); + // primitives + Set<NamedClass> atomicConcepts = new TreeSet<NamedClass>(conceptComparator); + Set<ObjectProperty> atomicRoles = new TreeSet<ObjectProperty>(roleComparator); + SortedSet<DatatypeProperty> datatypeProperties = new TreeSet<DatatypeProperty>(); + SortedSet<DatatypeProperty> booleanDatatypeProperties = new TreeSet<DatatypeProperty>(); + SortedSet<DatatypeProperty> doubleDatatypeProperties = new TreeSet<DatatypeProperty>(); + SortedSet<DatatypeProperty> intDatatypeProperties = new TreeSet<DatatypeProperty>(); + SortedSet<DatatypeProperty> stringDatatypeProperties = new TreeSet<DatatypeProperty>(); + SortedSet<Individual> individuals = new TreeSet<Individual>(); - // namespaces - private Map<String, String> prefixes = new TreeMap<String, String>(); - private String baseURI; + // namespaces + private Map<String, String> prefixes = new TreeMap<String, String>(); + private String baseURI; - // references to OWL API ontologies - private List<OWLOntology> owlAPIOntologies = new LinkedList<OWLOntology>(); - @ConfigOption(name = "reasonerType", description = "The name of the OWL APIReasoner to use {\"fact\", \"hermit\", \"owllink\", \"pellet\", \"elk\", \"cel\"}", defaultValue = "pellet", required = false, propertyEditorClass = StringTrimmerEditor.class) - private String reasonerTypeString = "pellet"; - @ConfigOption(name = "owlLinkURL", description = "The URL to the owl server", defaultValue = "", required = false, propertyEditorClass = StringTrimmerEditor.class) - private String owlLinkURL; + // references to OWL API ontologies + private List<OWLOntology> owlAPIOntologies = new LinkedList<OWLOntology>(); + @ConfigOption(name = "reasonerType", description = "The name of the OWL APIReasoner to use {\"fact\", \"hermit\", \"owllink\", \"pellet\", \"elk\", \"cel\"}", defaultValue = "pellet", required = false, propertyEditorClass = StringTrimmerEditor.class) + private String reasonerTypeString = "pellet"; + @ConfigOption(name = "owlLinkURL", description = "The URL to the owl server", defaultValue = "", required = false, propertyEditorClass = StringTrimmerEditor.class) + private String owlLinkURL; + public OWLAPIReasoner() { - public OWLAPIReasoner() { + } - } + public OWLAPIReasoner(Set<KnowledgeSource> sources) { + super(sources); + } - public OWLAPIReasoner(Set<KnowledgeSource> sources) { - super(sources); - } + public static String getName() { + return "OWL API reasoner"; + } - public static String getName() { - return "OWL API reasoner"; - } + @Override + public void init() throws ComponentInitException { + // reset variables (otherwise subsequent initialisation with + // different knowledge sources will merge both) + atomicConcepts = new TreeSet<NamedClass>(conceptComparator); + atomicRoles = new TreeSet<ObjectProperty>(roleComparator); + datatypeProperties = new TreeSet<DatatypeProperty>(); + booleanDatatypeProperties = new TreeSet<DatatypeProperty>(); + doubleDatatypeProperties = new TreeSet<DatatypeProperty>(); + intDatatypeProperties = new TreeSet<DatatypeProperty>(); + stringDatatypeProperties = new TreeSet<DatatypeProperty>(); + individuals = new TreeSet<Individual>(); - @Override - public void init() throws ComponentInitException { - // reset variables (otherwise subsequent initialisation with - // different knowledge sources will merge both) - atomicConcepts = new TreeSet<NamedClass>(conceptComparator); - atomicRoles = new TreeSet<ObjectProperty>(roleComparator); - datatypeProperties = new TreeSet<DatatypeProperty>(); - booleanDatatypeProperties = new TreeSet<DatatypeProperty>(); - doubleDatatypeProperties = new TreeSet<DatatypeProperty>(); - intDatatypeProperties = new TreeSet<DatatypeProperty>(); - stringDatatypeProperties = new TreeSet<DatatypeProperty>(); - individuals = new TreeSet<Individual>(); + // create OWL API ontology manager - make sure we use a new data factory + // so that we don't default to the static one which can cause problems + // in a multi threaded environment. + manager = OWLManager.createOWLOntologyManager(new OWLDataFactoryImpl()); - // create OWL API ontology manager - make sure we use a new data factory so that we don't default to the static one which can cause problems in a multi threaded environment. - manager = OWLManager.createOWLOntologyManager(new OWLDataFactoryImpl()); + // it is a bit cumbersome to obtain all classes, because there + // are no reasoner queries to obtain them => hence we query them + // for each ontology and add them to a set; a comparator avoids + // duplicates by checking URIs + Comparator<OWLNamedObject> namedObjectComparator = new Comparator<OWLNamedObject>() { + public int compare(OWLNamedObject o1, OWLNamedObject o2) { + return o1.getIRI().compareTo(o2.getIRI()); + } + }; + Set<OWLClass> classes = new TreeSet<OWLClass>(namedObjectComparator); + Set<OWLObjectProperty> owlObjectProperties = new TreeSet<OWLObjectProperty>( + namedObjectComparator); + Set<OWLDataProperty> owlDatatypeProperties = new TreeSet<OWLDataProperty>( + namedObjectComparator); + Set<OWLNamedIndividual> owlIndividuals = new TreeSet<OWLNamedIndividual>( + namedObjectComparator); - // it is a bit cumbersome to obtain all classes, because there - // are no reasoner queries to obtain them => hence we query them - // for each ontology and add them to a set; a comparator avoids - // duplicates by checking URIs - Comparator<OWLNamedObject> namedObjectComparator = new Comparator<OWLNamedObject>() { - public int compare(OWLNamedObject o1, OWLNamedObject o2) { - return o1.getIRI().compareTo(o2.getIRI()); - } - }; - Set<OWLClass> classes = new TreeSet<OWLClass>(namedObjectComparator); - Set<OWLObjectProperty> owlObjectProperties = new TreeSet<OWLObjectProperty>(namedObjectComparator); - Set<OWLDataProperty> owlDatatypeProperties = new TreeSet<OWLDataProperty>(namedObjectComparator); - Set<OWLNamedIndividual> owlIndividuals = new TreeSet<OWLNamedIndividual>(namedObjectComparator); + Set<OWLOntology> allImports = new HashSet<OWLOntology>(); + prefixes = new TreeMap<String, String>(); - Set<OWLOntology> allImports = new HashSet<OWLOntology>(); - prefixes = new TreeMap<String, String>(); + Set<OWLImportsDeclaration> directImports = new HashSet<OWLImportsDeclaration>(); - Set<OWLImportsDeclaration> directImports = new HashSet<OWLImportsDeclaration>(); + for (KnowledgeSource source : sources) { - for (KnowledgeSource source : sources) { + if (source instanceof OWLOntologyKnowledgeSource) { + ontology = ((OWLOntologyKnowledgeSource) source).createOWLOntology(manager); + owlAPIOntologies.add(ontology); + } else { + // This reasoner requires an ontology to process + throw new ComponentInitException( + "OWL API Reasoner Requires an OWLKnowledgeSource. Received a KS of type: " + + source.getClass().getName()); + } - if (source instanceof OWLOntologyKnowledgeSource) { - ontology = ((OWLOntologyKnowledgeSource) source).createOWLOntology(manager); - owlAPIOntologies.add(ontology); - }else{ - //This reasoner requires an ontology to process - throw new ComponentInitException("OWL API Reasoner Requires an OWLKnowledgeSource. Received a KS of type: " + source.getClass().getName()); - } + directImports.addAll(ontology.getImportsDeclarations()); - directImports.addAll(ontology.getImportsDeclarations()); + try { + // imports includes the ontology itself + // FIXME this line throws the strange error + Set<OWLOntology> imports = manager.getImportsClosure(ontology); + allImports.addAll(imports); - try { - // imports includes the ontology itself - //FIXME this line throws the strange error - Set<OWLOntology> imports = manager.getImportsClosure(ontology); - allImports.addAll(imports); + // System.out.println(imports); + for (OWLOntology ont : imports) { + classes.addAll(ont.getClassesInSignature()); + owlObjectProperties.addAll(ont.getObjectPropertiesInSignature()); + owlDatatypeProperties.addAll(ont.getDataPropertiesInSignature()); + owlIndividuals.addAll(ont.getIndividualsInSignature()); + } -// System.out.println(imports); - for (OWLOntology ont : imports) { - classes.addAll(ont.getClassesInSignature()); - owlObjectProperties.addAll(ont.getObjectPropertiesInSignature()); - owlDatatypeProperties.addAll(ont.getDataPropertiesInSignature()); - owlIndividuals.addAll(ont.getIndividualsInSignature()); - } + } catch (UnknownOWLOntologyException uooe) { + logger.error("UnknownOWLOntologyException occured, imports were not loaded! This is a bug, which has not been fixed yet."); + } - } catch (UnknownOWLOntologyException uooe) { - logger.error("UnknownOWLOntologyException occured, imports were not loaded! This is a bug, which has not been fixed yet."); - } + // if several knowledge sources are included, then we can only + // guarantee that the base URI is from one of those sources (there + // can't be more than one); but we will take care that all prefixes + // are + // correctly imported + OWLOntologyFormat format = manager.getOntologyFormat(ontology); + if (format instanceof PrefixOWLOntologyFormat) { + prefixes.putAll(((PrefixOWLOntologyFormat) format).getPrefixName2PrefixMap()); + baseURI = ((PrefixOWLOntologyFormat) format).getDefaultPrefix(); + prefixes.remove(""); + } - // if several knowledge sources are included, then we can only - // guarantee that the base URI is from one of those sources (there - // can't be more than one); but we will take care that all prefixes are - // correctly imported - OWLOntologyFormat format = manager.getOntologyFormat(ontology); - if (format instanceof PrefixOWLOntologyFormat) { - prefixes.putAll(((PrefixOWLOntologyFormat) format).getPrefixName2PrefixMap()); - baseURI = ((PrefixOWLOntologyFormat) format).getDefaultPrefix(); - prefixes.remove(""); - } + } - } + // Now merge all of the knowledge sources into one ontology instance. + try { + // The following line illustrates a problem with using different + // OWLOntologyManagers. This can manifest itself if we have multiple + // sources who were created with different manager instances. + // ontology = + // OWLManager.createOWLOntologyManager().createOntology(IRI.create("http://dl-learner/all"), + // new HashSet<OWLOntology>(owlAPIOntologies)); + ontology = manager.createOntology(IRI.create("http://dl-learner/all"), + new HashSet<OWLOntology>(owlAPIOntologies)); + // we have to add all import declarations manually here, because + // this are no axioms + List<OWLOntologyChange> addImports = new ArrayList<OWLOntologyChange>(); + for (OWLImportsDeclaration i : directImports) { + addImports.add(new AddImport(ontology, i)); + } + manager.applyChanges(addImports); + } catch (OWLOntologyCreationException e1) { + e1.printStackTrace(); + } - //Now merge all of the knowledge sources into one ontology instance. - try { - //The following line illustrates a problem with using different OWLOntologyManagers. This can manifest itself if we have multiple sources who were created with different manager instances. - //ontology = OWLManager.createOWLOntologyManager().createOntology(IRI.create("http://dl-learner/all"), new HashSet<OWLOntology>(owlAPIOntologies)); - ontology = manager.createOntology(IRI.create("http://dl-learner/all"), new HashSet<OWLOntology>(owlAPIOntologies)); - //we have to add all import declarations manually here, because this are no axioms - List<OWLOntologyChange> addImports = new ArrayList<OWLOntologyChange>(); - for (OWLImportsDeclaration i : directImports) { - addImports.add(new AddImport(ontology, i)); - } - manager.applyChanges(addImports); - } catch (OWLOntologyCreationException e1) { - e1.printStackTrace(); - } + // configure reasoner + ReasonerProgressMonitor progressMonitor = new NullReasonerProgressMonitor(); + FreshEntityPolicy freshEntityPolicy = FreshEntityPolicy.ALLOW; + long timeOut = Integer.MAX_VALUE; + IndividualNodeSetPolicy individualNodeSetPolicy = IndividualNodeSetPolicy.BY_NAME; + OWLReasonerConfiguration conf = new SimpleConfiguration(progressMonitor, freshEntityPolicy, + timeOut, individualNodeSetPolicy); - //configure reasoner - ReasonerProgressMonitor progressMonitor = new NullReasonerProgressMonitor(); - FreshEntityPolicy freshEntityPolicy = FreshEntityPolicy.ALLOW; - long timeOut = Integer.MAX_VALUE; - IndividualNodeSetPolicy individualNodeSetPolicy = IndividualNodeSetPolicy.BY_NAME; - OWLReasonerConfiguration conf = new SimpleConfiguration(progressMonitor, freshEntityPolicy, timeOut, individualNodeSetPolicy); + // create actual reasoner + if (getReasonerTypeString().equals("fact")) { + try { + reasoner = new FaCTPlusPlusReasonerFactory().createNonBufferingReasoner(ontology, + conf); - // create actual reasoner - if (getReasonerTypeString().equals("fact")) { - try { - reasoner = new FaCTPlusPlusReasonerFactory().createNonBufferingReasoner(ontology, conf); - } catch (Exception e) { - throw new RuntimeException(e); - } - System.out.println("Using FaCT++."); - } else if (getReasonerTypeString().equals("hermit")) { - // instantiate HermiT reasoner - reasoner = new ReasonerFactory().createNonBufferingReasoner(ontology, conf); - } else if (getReasonerTypeString().equals("pellet")) { - // instantiate Pellet reasoner - reasoner = PelletReasonerFactory.getInstance().createNonBufferingReasoner(ontology, conf); - // change log level to WARN for Pellet, because otherwise log - // output will be very large - Logger pelletLogger = Logger.getLogger("org.mindswap.pellet"); - pelletLogger.setLevel(Level.WARN); - } else if (getReasonerTypeString().equals("elk")) { - // instantiate ELK reasoner - reasoner = new ElkReasonerFactory().createNonBufferingReasoner(ontology, conf); - } else if (getReasonerTypeString().equals("cel")) { - // instantiate CEL reasoner - reasoner = new CelReasoner(ontology, conf); - } else { - try { - OWLlinkHTTPXMLReasonerFactory factory = new OWLlinkHTTPXMLReasonerFactory(); - URL url = new URL(getOwlLinkURL());//Configure the server end-point - OWLlinkReasonerConfiguration config = new OWLlinkReasonerConfiguration(url); - reasoner = factory.createNonBufferingReasoner(ontology, config); - System.out.println(reasoner.getReasonerName()); - } catch (Exception e) { -// e.printStackTrace(); - throw new ComponentInitException(e); - } - } + } catch (Exception e) { + throw new RuntimeException(e); + } + System.out.println("Using FaCT++."); + } else if (getReasonerTypeString().equals("hermit")) { + // instantiate HermiT reasoner + reasoner = new ReasonerFactory().createNonBufferingReasoner(ontology, conf); + } else if (getReasonerTypeString().equals("pellet")) { + // instantiate Pellet reasoner + reasoner = PelletReasonerFactory.getInstance().createNonBufferingReasoner(ontology, + conf); + // change log level to WARN for Pellet, because otherwise log + // output will be very large + Logger pelletLogger = Logger.getLogger("org.mindswap.pellet"); + pelletLogger.setLevel(Level.WARN); + } else if (getReasonerTypeString().equals("elk")) { + // instantiate ELK reasoner + reasoner = new ElkReasonerFactory().createNonBufferingReasoner(ontology, conf); + // reasoner = new + // MyElkReasonerFactory().createNonBufferingReasoner(ontology, + // conf); + } else if (getReasonerTypeString().equals("cel")) { + // instantiate CEL reasoner + reasoner = new CelReasoner(ontology, conf); + } else { + try { + OWLlinkHTTPXMLReasonerFactory factory = new OWLlinkHTTPXMLReasonerFactory(); + URL url = new URL(getOwlLinkURL());// Configure the server + // end-point + OWLlinkReasonerConfiguration config = new OWLlinkReasonerConfiguration(url); + reasoner = factory.createNonBufferingReasoner(ontology, config); + System.out.println(reasoner.getReasonerName()); + } catch (Exception e) { + // e.printStackTrace(); + throw new ComponentInitException(e); + } + } - /* - Set<OWLOntology> importsClosure = manager.getImportsClosure(ontology); - System.out.println("imports closure : " + importsClosure); - try { - reasoner.loadOntologies(importsClosure); - } catch (OWLReasonerException e1) { - // TODO Auto-generated catch block - e1.printStackTrace(); - }*/ + /* + * Set<OWLOntology> importsClosure = + * manager.getImportsClosure(ontology); + * System.out.println("imports closure : " + importsClosure); try { + * reasoner.loadOntologies(importsClosure); } catch + * (OWLReasonerException e1) { // TODO Auto-generated catch block + * e1.printStackTrace(); } + */ -// System.out.println(classes); -// System.out.println(properties); -// System.out.println(individuals); + // System.out.println(classes); + // System.out.println(properties); + // System.out.println(individuals); - // compute class hierarchy and types of individuals - // (done here to speed up later reasoner calls) - boolean inconsistentOntology = !reasoner.isConsistent(); + // compute class hierarchy and types of individuals + // (done here to speed up later reasoner calls) + boolean inconsistentOntology = !reasoner.isConsistent(); - if (!inconsistentOntology) { - reasoner.precomputeInferences(InferenceType.CLASS_HIERARCHY, InferenceType.CLASS_ASSERTIONS); - } else { - throw new ComponentInitException("Inconsistent ontologies."); - } + if (!inconsistentOntology) { + reasoner.precomputeInferences(InferenceType.CLASS_HIERARCHY, + InferenceType.CLASS_ASSERTIONS); + } else { + throw new ComponentInitException("Inconsistent ontologies."); + } - factory = manager.getOWLDataFactory(); + factory = manager.getOWLDataFactory(); -// try { -// if(reasoner.isDefined(factory.getOWLIndividual(URI.create("http://example.com/father#female")))) -// System.out.println("DEFINED."); -// else -// System.out.println("NOT DEFINED."); -// } catch (OWLReasonerException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } + // try { + // if(reasoner.isDefined(factory.getOWLIndividual(URI.create("http://example.com/father#female")))) + // System.out.println("DEFINED."); + // else + // System.out.println("NOT DEFINED."); + // } catch (OWLReasonerException e) { + // // TODO Auto-generated catch block + // e.printStackTrace(); + // } - // read in primitives - for (OWLClass owlClass : classes) - atomicConcepts.add(new NamedClass(owlClass.toStringID())); - for (OWLObjectProperty owlProperty : owlObjectProperties) - atomicRoles.add(new ObjectProperty(owlProperty.toStringID())); - for (OWLDataProperty owlProperty : owlDatatypeProperties) { - DatatypeProperty dtp = new DatatypeProperty(owlProperty.toStringID()); - Set<OWLDataRange> ranges = owlProperty.getRanges(allImports); - Iterator<OWLDataRange> it = ranges.iterator(); - if (it.hasNext()) { - OWLDataRange range = it.next(); - if (range.isDatatype()) { - URI uri = ((OWLDatatype) range).getIRI().toURI(); - if (uri.equals(OWL2Datatype.BOOLEAN.getURI())) booleanDatatypeProperties.add(dtp); - else if (uri.equals(OWL2Datatype.DOUBLE.getURI())) doubleDatatypeProperties.add(dtp); - else if (uri.equals(OWL2Datatype.INT.getURI())) intDatatypeProperties.add(dtp); - else if (uri.equals(OWL2Datatype.STRING.getURI())) stringDatatypeProperties.add(dtp); - } - } else { - stringDatatypeProperties.add(dtp); - } - datatypeProperties.add(dtp); - } - for (OWLNamedIndividual owlIndividual : owlIndividuals) { - individuals.add(new Individual(owlIndividual.toStringID())); - } + // read in primitives + for (OWLClass owlClass : classes) + atomicConcepts.add(new NamedClass(owlClass.toStringID())); + for (OWLObjectProperty owlProperty : owlObjectProperties) + atomicRoles.add(new ObjectProperty(owlProperty.toStringID())); + for (OWLDataProperty owlProperty : owlDatatypeProperties) { + DatatypeProperty dtp = new DatatypeProperty(owlProperty.toStringID()); + Set<OWLDataRange> ranges = owlProperty.getRanges(allImports); + Iterator<OWLDataRange> it = ranges.iterator(); + if (it.hasNext()) { + OWLDataRange range = it.next(); + if (range.isDatatype()) { + URI uri = ((OWLDatatype) range).getIRI().toURI(); + if (uri.equals(OWL2Datatype.BOOLEAN.getURI())) + booleanDatatypeProperties.add(dtp); + else if (uri.equals(OWL2Datatype.DOUBLE.getURI())) + doubleDatatypeProperties.add(dtp); + else if (uri.equals(OWL2Datatype.INT.getURI())) + intDatatypeProperties.add(dtp); + else if (uri.equals(OWL2Datatype.STRING.getURI())) + stringDatatypeProperties.add(dtp); + } + } else { + stringDatatypeProperties.add(dtp); + } + datatypeProperties.add(dtp); + } + for (OWLNamedIndividual owlIndividual : owlIndividuals) { + individuals.add(new Individual(owlIndividual.toStringID())); + } - // remove top and bottom properties (for backwards compatibility) -// atomicRoles.remove(new ObjectProperty("http://www.w3.org/2002/07/owl#bottomObjectProperty")); -// atomicRoles.remove(new ObjectProperty("http://www.w3.org/2002/07/owl#topObjectProperty")); - } + // remove top and bottom properties (for backwards compatibility) + // atomicRoles.remove(new + // ObjectProperty("http://www.w3.org/2002/07/owl#bottomObjectProperty")); + // atomicRoles.remove(new + // ObjectProperty("http://www.w3.org/2002/07/owl#topObjectProperty")); + } - /* (non-Javadoc) - * @see org.dllearner.core.Reasoner#getAtomicConcepts() - */ - public Set<NamedClass> getNamedClasses() { - return Collections.unmodifiableSet(atomicConcepts); - } + /* + * (non-Javadoc) + * + * @see org.dllearner.core.Reasoner#getAtomicConcepts() + */ + public Set<NamedClass> getNamedClasses() { + return Collections.unmodifiableSet(atomicConcepts); + } - /* (non-Javadoc) - * @see org.dllearner.core.Reasoner#getAtomicRoles() - */ - public Set<ObjectProperty> getObjectProperties() { - return Collections.unmodifiableSet(atomicRoles); - } + /* + * (non-Javadoc) + * + * @see org.dllearner.core.Reasoner#getAtomicRoles() + */ + public Set<ObjectProperty> getObjectProperties() { + return Collections.unmodifiableSet(atomicRoles); + } - @Override - public SortedSet<DatatypeProperty> getDatatypePropertiesImpl() { - return datatypeProperties; - } + @Override + public SortedSet<DatatypeProperty> getDatatypePropertiesImpl() { + return datatypeProperties; + } - /* (non-Javadoc) - * @see org.dllearner.core.Reasoner#getIndividuals() - */ - public SortedSet<Individual> getIndividuals() { - return individuals; - } + /* + * (non-Javadoc) + * + * @see org.dllearner.core.Reasoner#getIndividuals() + */ + public SortedSet<Individual> getIndividuals() { + return individuals; + } - /* (non-Javadoc) - * @see org.dllearner.core.Reasoner#getReasonerType() - */ - @Override - public ReasonerType getReasonerType() { - if (getReasonerTypeString().equals("fact")) { - return ReasonerType.OWLAPI_FACT; - } else if (getReasonerTypeString().equals("hermit")) { - return ReasonerType.OWLAPI_HERMIT; - } else { - return ReasonerType.OWLAPI_PELLET; - } - } + /* + * (non-Javadoc) + * + * @see org.dllearner.core.Reasoner#getReasonerType() + */ + @Override + public ReasonerType getReasonerType() { + if (getReasonerTypeString().equals("fact")) { + return ReasonerType.OWLAPI_FACT; + } else if (getReasonerTypeString().equals("hermit")) { + return ReasonerType.OWLAPI_HERMIT; + } else { + return ReasonerType.OWLAPI_PELLET; + } + } -// @Override -// public ObjectPropertyHierarchy prepareRoleHierarchy() { -// // code copied from DIG reasoner -// -// TreeMap<ObjectProperty, TreeSet<ObjectProperty>> roleHierarchyUp = new TreeMap<ObjectProperty, TreeSet<ObjectProperty>>( -// roleComparator); -// TreeMap<ObjectProperty, TreeSet<ObjectProperty>> roleHierarchyDown = new TreeMap<ObjectProperty, TreeSet<ObjectProperty>>( -// roleComparator); -// -// // refinement of atomic concepts -// for (ObjectProperty role : atomicRoles) { -// roleHierarchyDown.put(role, getMoreSpecialRolesImpl(role)); -// roleHierarchyUp.put(role, getMoreGeneralRolesImpl(role)); -// } -// -// roleHierarchy = new ObjectPropertyHierarchy(atomicRoles, roleHierarchyUp, -// roleHierarchyDown); -// return roleHierarchy; -// } + // @Override + // public ObjectPropertyHierarchy prepareRoleHierarchy() { + // // code copied from DIG reasoner + // + // TreeMap<ObjectProperty, TreeSet<ObjectProperty>> roleHierarchyUp = new + // TreeMap<ObjectProperty, TreeSet<ObjectProperty>>( + // roleComparator); + // TreeMap<ObjectProperty, TreeSet<ObjectProperty>> roleHierarchyDown = new + // TreeMap<ObjectProperty, TreeSet<ObjectProperty>>( + // roleComparator); + // + // // refinement of atomic concepts + // for (ObjectProperty role : atomicRoles) { + // roleHierarchyDown.put(role, getMoreSpecialRolesImpl(role)); + // roleHierarchyUp.put(role, getMoreGeneralRolesImpl(role)); + // } + // + // roleHierarchy = new ObjectPropertyHierarchy(atomicRoles, roleHierarchyUp, + // roleHierarchyDown); + // return roleHierarchy; + // } - /* (non-Javadoc) - * @see org.dllearner.core.Reasoner#prepareRoleHierarchy(java.util.Set) - */ -// public void prepareRoleHierarchy(Set<ObjectProperty> allowedRoles) { -// // code copied from DIG reasoner -// -// TreeMap<ObjectProperty, TreeSet<ObjectProperty>> roleHierarchyUp = new TreeMap<ObjectProperty, TreeSet<ObjectProperty>>( -// roleComparator); -// TreeMap<ObjectProperty, TreeSet<ObjectProperty>> roleHierarchyDown = new TreeMap<ObjectProperty, TreeSet<ObjectProperty>>( -// roleComparator); -// -// // refinement of atomic concepts -// for (ObjectProperty role : atomicRoles) { -// roleHierarchyDown.put(role, getMoreSpecialRolesImpl(role)); -// roleHierarchyUp.put(role, getMoreGeneralRolesImpl(role)); -// } -// -// roleHierarchy = new ObjectPropertyHierarchy(allowedRoles, roleHierarchyUp, -// roleHierarchyDown); -// } + /* + * (non-Javadoc) + * + * @see org.dllearner.core.Reasoner#prepareRoleHierarchy(java.util.Set) + */ + // public void prepareRoleHierarchy(Set<ObjectProperty> allowedRoles) { + // // code copied from DIG reasoner + // + // TreeMap<ObjectProperty, TreeSet<ObjectProperty>> roleHierarchyUp = new + // TreeMap<ObjectProperty, TreeSet<ObjectProperty>>( + // roleComparator); + // TreeMap<ObjectProperty, TreeSet<ObjectProperty>> roleHierarchyDown = new + // TreeMap<ObjectProperty, TreeSet<ObjectProperty>>( + // roleComparator); + // + // // refinement of atomic concepts + // for (ObjectProperty role : atomicRoles) { + // roleHierarchyDown.put(role, getMoreSpecialRolesImpl(role)); + // roleHierarchyUp.put(role, getMoreGeneralRolesImpl(role)); + // } + // + // roleHierarchy = new ObjectPropertyHierarchy(allowedRoles, + // roleHierarchyUp, + // roleHierarchyDown); + // } -// @Override -// public ObjectPropertyHierarchy getRoleHierarchy() { -// return roleHierarchy; -// } + // @Override + // public ObjectPropertyHierarchy getRoleHierarchy() { + // return roleHierarchy; + // } -// public void prepareDatatypePropertyHierarchyImpl(Set<DatatypeProperty> allowedRoles) { -// // code copied from DIG reasoner -// -// TreeMap<DatatypeProperty, TreeSet<DatatypeProperty>> datatypePropertyHierarchyUp = new TreeMap<DatatypeProperty, TreeSet<DatatypeProperty>>( -// roleComparator); -// TreeMap<DatatypeProperty, TreeSet<DatatypeProperty>> datatypePropertyHierarchyDown = new TreeMap<DatatypeProperty, TreeSet<DatatypeProperty>>( -// roleComparator); -// -// // refinement of atomic concepts -// for (DatatypeProperty role : datatypeProperties) { -// datatypePropertyHierarchyDown.put(role, getMoreSpecialDatatypePropertiesImpl(role)); -// datatypePropertyHierarchyUp.put(role, getMoreGeneralDatatypePropertiesImpl(role)); -// } -// -// datatypePropertyHierarchy = new DatatypePropertyHierarchy(allowedRoles, datatypePropertyHierarchyUp, -// datatypePropertyHierarchyDown); -// } + // public void prepareDatatypePropertyHierarchyImpl(Set<DatatypeProperty> + // allowedRoles) { + // // code copied from DIG reasoner + // + // TreeMap<DatatypeProperty, TreeSet<DatatypeProperty>> + // datatypePropertyHierarchyUp = new TreeMap<DatatypeProperty, + // TreeSet<DatatypeProperty>>( + // roleComparator); + // TreeMap<DatatypeProperty, TreeSet<DatatypeProperty>> + // datatypePropertyHierarchyDown = new TreeMap<DatatypeProperty, + // TreeSet<DatatypeProperty>>( + // roleComparator); + // + // // refinement of atomic concepts + // for (DatatypeProperty role : datatypeProperties) { + // datatypePropertyHierarchyDown.put(role, + // getMoreSpecialDatatypePropertiesImpl(role)); + // datatypePropertyHierarchyUp.put(role, + // getMoreGeneralDatatypePropertiesImpl(role)); + // } + // + // datatypePropertyHierarchy = new DatatypePropertyHierarchy(allowedRoles, + // datatypePropertyHierarchyUp, + // datatypePropertyHierarchyDown); + // } -// @Override -// public DatatypePropertyHierarchy getDatatypePropertyHierarchy() { -// return datatypePropertyHierarchy; -// } + // @Override + // public DatatypePropertyHierarchy getDatatypePropertyHierarchy() { + // return datatypePropertyHierarchy; + // } - @Override - public boolean isSuperClassOfImpl(Description superConcept, Description subConcept) { - return reasoner.isEntailed(factory.getOWLSubClassOfAxiom(OWLAPIDescriptionConvertVisitor.getOWLClassExpression(subConcept), OWLAPIDescriptionConvertVisitor.getOWLClassExpression(superConcept))); - } + @Override + public boolean isSuperClassOfImpl(Description superConcept, Description subConcept) { + return reasoner.isEntailed(factory.getOWLSubClassOfAxiom( + OWLAPIDescriptionConvertVisitor.getOWLClassExpression(subConcept), + OWLAPIDescriptionConvertVisitor.getOWLClassExpression(superConcept))); + } - @Override - protected boolean isEquivalentClassImpl(Description class1, Description class2) { - return reasoner.isEntailed(factory.getOWLEquivalentClassesAxiom(OWLAPIDescriptionConvertVisitor.getOWLClassExpression(class1), OWLAPIDescriptionConvertVisitor.getOWLClassExpression(class2))); - } + @Override + protected boolean isEquivalentClassImpl(Description class1, Description class2) { + return reasoner.isEntailed(factory.getOWLEquivalentClassesAxiom( + OWLAPIDescriptionConvertVisitor.getOWLClassExpression(class1), + OWLAPIDescriptionConvertVisitor.getOWLClassExpression(class2))); + } - @Override - protected TreeSet<Description> getSuperClassesImpl(Description concept) { - NodeSet<OWLClass> classes = null; + @Override + protected TreeSet<Description> getSuperClassesImpl(Description concept) { + NodeSet<OWLClass> classes = null; - classes = reasoner.getSuperClasses(OWLAPIDescriptionConvertVisitor.getOWLClassExpression(concept), true); + classes = reasoner.getSuperClasses( + OWLAPIDescriptionConvertVisitor.getOWLClassExpression(concept), true); - return getFirstClasses(classes); - } + return getFirstClasses(classes); + } - @Override - protected TreeSet<Description> getSubClassesImpl(Description concept) { - NodeSet<OWLClass> classes = null; + @Override + protected TreeSet<Description> getSubClassesImpl(Description concept) { + NodeSet<OWLClass> classes = null; - classes = reasoner.getSubClasses(OWLAPIDescriptionConvertVisitor.getOWLClassExpression(concept), true); + classes = reasoner.getSubClasses( + OWLAPIDescriptionConvertVisitor.getOWLClassExpression(concept), true); - return getFirstClasses(classes); - } + return getFirstClasses(classes); + } - @Override - protected TreeSet<ObjectProperty> getSuperPropertiesImpl(ObjectProperty role) { - NodeSet<OWLObjectPropertyExpression> properties = null; + @Override + protected TreeSet<ObjectProperty> getSuperPropertiesImpl(ObjectProperty role) { + NodeSet<OWLObjectPropertyExpression> properties = null; - properties = reasoner.getSuperObjectProperties(OWLAPIConverter.getOWLAPIObjectProperty(role), true); + try { + properties = reasoner.getSuperObjectProperties( + OWLAPIConverter.getOWLAPIObjectProperty(role), true); + return getFirstObjectProperties(properties); + } catch (Exception e) { + TreeSet<ObjectProperty> roles = new TreeSet<ObjectProperty>(roleComparator); + return roles; + } + } - return getFirstObjectProperties(properties); - } + @Override + protected TreeSet<ObjectProperty> getSubPropertiesImpl(ObjectProperty role) { + NodeSet<OWLObjectPropertyExpression> properties = null; - @Override - protected TreeSet<ObjectProperty> getSubPropertiesImpl(ObjectProperty role) { - NodeSet<OWLObjectPropertyExpression> properties = null; + try { + properties = reasoner.getSubObjectProperties( + OWLAPIConverter.getOWLAPIObjectProperty(role), true); + return getFirstObjectProperties(properties); + } catch (Exception e) { + TreeSet<ObjectProperty> roles = new TreeSet<ObjectProperty>(roleComparator); + return roles; + } - properties = reasoner.getSubObjectProperties(OWLAPIConverter.getOWLAPIObjectProperty(role), true); + } - return getFirstObjectProperties(properties); - } + @Override + protected TreeSet<DatatypeProperty> getSuperPropertiesImpl(DatatypeProperty role) { + NodeSet<OWLDataProperty> properties = null; - @Override - protected TreeSet<DatatypeProperty> getSuperPropertiesImpl(DatatypeProperty role) { - NodeSet<OWLDataProperty> properties = null; + properties = reasoner.getSuperDataProperties(OWLAPIConverter.getOWLAPIDataProperty(role), + true); - properties = reasoner.getSuperDataProperties(OWLAPIConverter.getOWLAPIDataProperty(role), true); + return getFirstDatatypeProperties(properties); + } - return getFirstDatatypeProperties(properties); - } + @Override + protected TreeSet<DatatypeProperty> getSubPropertiesImpl(DatatypeProperty role) { + NodeSet<OWLDataProperty> properties = null; - @Override - protected TreeSet<DatatypeProperty> getSubPropertiesImpl(DatatypeProperty role) { - NodeSet<OWLDataProperty> properties = null; + properties = reasoner.getSubDataProperties(OWLAPIConverter.getOWLAPIDataProperty(role), + true); - properties = reasoner.getSubDataProperties(OWLAPIConverter.getOWLAPIDataProperty(role), true); + return getFirstDatatypeProperties(properties); + } - return getFirstDatatypeProperties(properties); - } + @Override + public boolean hasTypeImpl(Description concept, Individual individual) { + boolean test = false; + OWLClassExpression d = OWLAPIDescriptionConvertVisitor.getOWLClassExpression(concept); + OWLIndividual i = factory.getOWLNamedIndividual(IRI.create(individual.getName())); + try { + test = reasoner.isEntailed(factory.getOWLClassAssertionAxiom(d, i)); + } catch (Exception e) { + test = true; + } + return test; + } - @Override - public boolean hasTypeImpl(Description concept, Individual individual) { - OWLClassExpression d = OWLAPIDescriptionConvertVisitor.getOWLClassExpression(concept); - OWLIndividual i = factory.getOWLNamedIndividual(IRI.create(individual.getName())); - return reasoner.isEntailed(factory.getOWLClassAssertionAxiom(d, i)); - } + @Override + public SortedSet<Individual> getIndividualsImpl(Description concept) { + // OWLDescription d = getOWLAPIDescription(concept); + OWLClassExpression d = OWLAPIDescriptionConvertVisitor.getOWLClassExpression(concept); + Set<OWLNamedIndividual> individuals = reasoner.getInstances(d, false).getFlattened(); + SortedSet<Individual> inds = new TreeSet<Individual>(); + for (OWLNamedIndividual ind : individuals) + // ugly code + if (ind != null) + inds.add(new Individual(ind.toStringID())); + return inds; + } - @Override - public SortedSet<Individual> getIndividualsImpl(Description concept) { -// OWLDescription d = getOWLAPIDescription(concept); - OWLClassExpression d = OWLAPIDescriptionConvertVisitor.getOWLClassExpression(concept); - Set<OWLNamedIndividual> individuals = reasoner.getInstances(d, false).getFlattened(); - SortedSet<Individual> inds = new TreeSet<Individual>(); - for (OWLNamedIndividual ind : individuals) - //ugly code - if (ind != null) inds.add(new Individual(ind.toStringID())); - return inds; - } + @Override + public Set<NamedClass> getTypesImpl(Individual individual) { + Set<Node<OWLClass>> result = null; - @Override - public Set<NamedClass> getTypesImpl(Individual individual) { - Set<Node<OWLClass>> result = null; + result = reasoner.getTypes(factory.getOWLNamedIndividual(IRI.create(individual.getName())), + false).getNodes(); - result = reasoner.getTypes(factory.getOWLNamedIndividual(IRI.create(individual.getName())), false).getNodes(); + return getFirstClassesNoTopBottom(result); + } - return getFirstClassesNoTopBottom(result); - } + @Override + public boolean isSatisfiableImpl() { + return reasoner.isSatisfiable(factory.getOWLThing()); + } - @Override - public boolean isSatisfiableImpl() { - return reasoner.isSatisfiable(factory.getOWLThing()); - } + @Override + public Description getDomainImpl(ObjectProperty objectProperty) { + OWLObjectProperty prop = OWLAPIConverter.getOWLAPIObjectProperty(objectProperty); - @Override - public Description getDomainImpl(ObjectProperty objectProperty) { - OWLObjectProperty prop = OWLAPIConverter.getOWLAPIObjectProperty(objectProperty); - - // Pellet returns a set of nodes of named classes, which are more - // general than the actual domain/range - NodeSet<OWLClass> set; + // Pellet returns a set of nodes of named classes, which are more + // general than the actual domain/range + NodeSet<OWLClass> set; try { set = reasoner.getObjectPropertyDomains(prop, false); return getDescriptionFromReturnedDomain(set); - } catch (InconsistentOntologyException e) { - e.printStackTrace(); - } catch (FreshEntitiesException e) { - e.printStackTrace(); - } catch (TimeOutException e) { - e.printStackTrace(); - } catch (ReasonerInterruptedException e) { - e.printStackTrace(); - } catch(de.tudresden.inf.lat.cel.owlapi.UnsupportedReasonerOperationInCelException e){ - e.printStackTrace(); + } catch (Exception e) { + return getDescriptionFromReturnedDomain(null); } - return Thing.instance; + } - } + @Override + public Description getDomainImpl(DatatypeProperty datatypeProperty) { + OWLDataProperty prop = OWLAPIConverter.getOWLAPIDataProperty(datatypeProperty); - @Override - public Description getDomainImpl(DatatypeProperty datatypeProperty) { - OWLDataProperty prop = OWLAPIConverter.getOWLAPIDataProperty(datatypeProperty); + NodeSet<OWLClass> set = reasoner.getDataPropertyDomains(prop, true); + return getDescriptionFromReturnedDomain(set); - NodeSet<OWLClass> set = reasoner.getDataPropertyDomains(prop, true); - return getDescriptionFromReturnedDomain(set); + } - } + @Override + public Description getRangeImpl(ObjectProperty objectProperty) { + OWLObjectProperty prop = OWLAPIConverter.getOWLAPIObjectProperty(objectProperty); - @Override - public Description getRangeImpl(ObjectProperty objectProperty) { - OWLObjectProperty prop = OWLAPIConverter.getOWLAPIObjectProperty(objectProperty); - - NodeSet<OWLClass> set; + NodeSet<OWLClass> set; try { set = reasoner.getObjectPropertyRanges(prop, true); - if (set.isEmpty()) return new Thing(); - OWLClass oc = set.iterator().next().getRepresentativeElement(); - if (oc.isOWLThing()) { - return Thing.instance; - } - return new NamedClass(oc.toStringID()); - } catch (InconsistentOntologyException e) { - e.printStackTrace(); - } catch (FreshEntitiesException e) { - e.printStackTrace(); - } catch (TimeOutException e) { - e.printStackTrace(); - } catch (ReasonerInterruptedException e) { - e.printStackTrace(); - } catch(de.tudresden.inf.lat.cel.owlapi.UnsupportedReasonerOperationInCelException e){ - e.printStackTrace(); + if (set.isEmpty()) + return new Thing(); + OWLClass oc = set.iterator().next().getRepresentativeElement(); + if (oc.isOWLThing()) { + return Thing.instance; + } + return new NamedClass(oc.toStringID()); + } catch (Exception e) { + return Thing.instance; } - return Thing.instance; + } - } + private Description getDescriptionFromReturnedDomain(NodeSet<OWLClass> set) { + if (set.isEmpty()) + return new Thing(); - private Description getDescriptionFromReturnedDomain(NodeSet<OWLClass> set) { - if (set.isEmpty()) return new Thing(); + Set<OWLClassExpression> union = new HashSet<OWLClassExpression>(); + Set<OWLClassExpression> domains = new HashSet<OWLClassExpression>(); - Set<OWLClassExpression> union = new HashSet<OWLClassExpression>(); - Set<OWLClassExpression> domains = new HashSet<OWLClassExpression>(); + for (Node<OWLClass> descs : set) { + for (OWLClassExpression desc : descs) { + union.add(desc); + } + } + for (OWLClassExpression desc : union) { + boolean isSuperClass = false; + for (Description d : getClassHierarchy().getSubClasses( + OWLAPIConverter.convertClass(desc.asOWLClass()))) { + if (union.contains(OWLAPIConverter.getOWLAPIDescription(d))) { + isSuperClass = true; + break; + } + } + if (!isSuperClass) { + domains.add(desc); + } + } - for (Node<OWLClass> descs : set) { - for (OWLClassExpression desc : descs) { - union.add(desc); - } - } - for (OWLClassExpression desc : union) { - boolean isSuperClass = false; - for (Description d : getClassHierarchy().getSubClasses(OWLAPIConverter.convertClass(desc.asOWLClass()))) { - if (union.contains(OWLAPIConverter.getOWLAPIDescription(d))) { - isSuperClass = true; - break; - } - } - if (!isSuperClass) { - domains.add(desc); - } - } + OWLClass oc = (OWLClass) domains.iterator().next(); + if (oc.isOWLThing()) { + return new Thing(); + } else { + return new NamedClass(oc.toStringID()); + } + } - OWLClass oc = (OWLClass) domains.iterator().next(); - if (oc.isOWLThing()) { - return new Thing(); - } else { - return new NamedClass(oc.toStringID()); - } - } + @Override + public Map<Individual, SortedSet<Individual>> getPropertyMembersImpl(ObjectProperty atomicRole) { + OWLObjectProperty prop = OWLAPIConverter.getOWLAPIObjectProperty(atomicRole); + Map<Individual, SortedSet<Individual>> map = new TreeMap<Individual, SortedSet<Individual>>(); + for (Individual i : individuals) { + OWLNamedIndividual ind = factory.getOWLNamedIndividual(IRI.create(i.getName())); - @Override - public Map<Individual, SortedSet<Individual>> getPropertyMembersImpl(ObjectProperty atomicRole) { - OWLObjectProperty prop = OWLAPIConverter.getOWLAPIObjectProperty(atomicRole); - Map<Individual, SortedSet<Individual>> map = new TreeMap<Individual, SortedSet<Individual>>(); - for (Individual i : individuals) { - OWLNamedIndividual ind = factory.getOWLNamedIndividual(IRI.create(i.getName())); + // get all related individuals via OWL API + Set<OWLNamedIndividual> inds; + try { + inds = reasoner.getObjectPropertyValues(ind, prop).getFlattened(); + } catch (Exception e) { + inds = null; + } - // get all related individuals via OWL API - Set<OWLNamedIndividual> inds = reasoner.getObjectPropertyValues(ind, prop).getFlattened(); + // convert data back to DL-Learner structures + SortedSet<Individual> is = new TreeSet<Individual>(); + try { + for (OWLNamedIndividual oi : inds) + is.add(new Individual(oi.toStringID())); + map.put(i, is); + } catch (Exception e) { + map = null; + } + } + return map; + } - // convert data back to DL-Learner structures - SortedSet<Individual> is = new TreeSet<Individual>(); - for (OWLNamedIndividual oi : inds) - is.add(new Individual(oi.toStringID())); - map.put(i, is); - } - return map; - } + @Override + protected Map<ObjectProperty, Set<Individual>> getObjectPropertyRelationshipsImpl( + Individual individual) { + OWLNamedIndividual ind = factory.getOWLNamedIndividual(IRI.create(individual.getName())); + Map<OWLObjectPropertyExpression, Set<OWLNamedIndividual>> mapAPI = new HashMap<OWLObjectPropertyExpression, Set<OWLNamedIndividual>>(); - @Override - protected Map<ObjectProperty, Set<Individual>> getObjectPropertyRelationshipsImpl(Individual individual) { - OWLNamedIndividual ind = factory.getOWLNamedIndividual(IRI.create(individual.getName())); - Map<OWLObjectPropertyExpression, Set<OWLNamedIndividual>> mapAPI = new HashMap<OWLObjectPropertyExpression, Set<OWLNamedIndividual>>(); + // Map<OWLObjectPropertyExpression, Set<OWLIndividual>> mapAPI = + // ind.getObjectPropertyValues(ontology); + // no method found in the new reasoner interface, so we have to ask the + // reasoner for each property in the ontology + for (OWLObjectProperty prop : ontology.getObjectPropertiesInSignature(true)) { + try { + mapAPI.put(prop, reasoner.getObjectPropertyValues(ind, prop).getFlattened()); + } catch (Exception e) { + mapAPI.put(prop, null); + } + } -// Map<OWLObjectPropertyExpression, Set<OWLIndividual>> mapAPI = ind.getObjectPropertyValues(ontology); - //no method found in the new reasoner interface, so we have to ask the reasoner for each property in the ontology - for (OWLObjectProperty prop : ontology.getObjectPropertiesInSignature(true)) { - mapAPI.put(prop, reasoner.getObjectPropertyValues(ind, prop).getFlattened()); - } + Map<ObjectProperty, Set<Individual>> map = new TreeMap<ObjectProperty, Set<Individual>>(); + for (Entry<OWLObjectPropertyExpression, Set<OWLNamedIndividual>> entry : mapAPI.entrySet()) { + ObjectProperty prop = OWLAPIConverter.convertObjectProperty(entry.getKey() + .asOWLObjectProperty()); + Set<Individual> inds = OWLAPIConverter.convertIndividuals(entry.getValue()); + map.put(prop, inds); + } + return map; + } - Map<ObjectProperty, Set<Individual>> map = new TreeMap<ObjectProperty, Set<Individual>>(); - for (Entry<OWLObjectPropertyExpression, Set<OWLNamedIndividual>> entry : mapAPI.entrySet()) { - ObjectProperty prop = OWLAPIConverter.convertObjectProperty(entry.getKey().asOWLObjectProperty()); - Set<Individual> inds = OWLAPIConverter.convertIndividuals(entry.getValue()); - map.put(prop, inds); - } - return map; - } + @Override + public Set<Individual> getRelatedIndividualsImpl(Individual individual, + ObjectProperty objectProperty) { + OWLNamedIndividual ind = factory.getOWLNamedIndividual(IRI.create(individual.getName())); + OWLObjectProperty prop = OWLAPIConverter.getOWLAPIObjectProperty(objectProperty); + Set<OWLNamedIndividual> inds = null; - @Override - public Set<Individual> getRelatedIndividualsImpl(Individual individual, ObjectProperty objectProperty) { - OWLNamedIndividual ind = factory.getOWLNamedIndividual(IRI.create(individual.getName())); - OWLObjectProperty prop = OWLAPIConverter.getOWLAPIObjectProperty(objectProperty); - Set<OWLNamedIndividual> inds = null; + try { + inds = reasoner.getObjectPropertyValues(ind, prop).getFlattened(); + } catch (Exception e) { + inds = null; + } - inds = reasoner.getObjectPropertyValues(ind, prop).getFlattened(); + // convert data back to DL-Learner structures + SortedSet<Individual> is = new TreeSet<Individual>(); + for (OWLNamedIndividual oi : inds) { + is.add(new Individual(oi.toStringID())); + } + return is; + } - // convert data back to DL-Learner structures - SortedSet<Individual> is = new TreeSet<Individual>(); - for (OWLNamedIndividual oi : inds) { - is.add(new Individual(oi.toStringID())); - } - return is; - } + @Override + public Set<Constant> getRelatedValuesImpl(Individual individual, + DatatypeProperty datatypeProperty) { + OWLNamedIndividual ind = factory.getOWLNamedIndividual(IRI.create(individual.getName())); + OWLDataProperty prop = OWLAPIConverter.getOWLAPIDataProperty(datatypeProperty); + Set<OWLLiteral> constants = null; - @Override - public Set<Constant> getRelatedValuesImpl(Individual individual, DatatypeProperty datatypeProperty) { - OWLNamedIndividual ind = factory.getOWLNamedIndividual(IRI.create(individual.getName())); - OWLDataProperty prop = OWLAPIConverter.getOWLAPIDataProperty(datatypeProperty); - Set<OWLLiteral> constants = null; + constants = reasoner.getDataPropertyValues(ind, prop); - constants = reasoner.getDataPropertyValues(ind, prop); + return OWLAPIConverter.convertConstants(constants); + } - return OWLAPIConverter.convertConstants(constants); - } + public Map<Individual, SortedSet<Double>> getDoubleValues(DatatypeProperty datatypeProperty) { + OWLDataProperty prop = OWLAPIConverter.getOWLAPIDataProperty(datatypeProperty); + Map<Individual, SortedSet<Double>> map = new TreeMap<Individual, SortedSet<Double>>(); + for (Individual i : individuals) { + OWLNamedIndividual ind = factory.getOWLNamedIndividual(IRI.create(i.getName())); - public Map<Individual, SortedSet<Double>> getDoubleValues(DatatypeProperty datatypeProperty) { - OWLDataProperty prop = OWLAPIConverter.getOWLAPIDataProperty(datatypeProperty); - Map<Individual, SortedSet<Double>> map = new TreeMap<Individual, SortedSet<Double>>(); - for (Individual i : individuals) { - OWLNamedIndividual ind = factory.getOWLNamedIndividual(IRI.create(i.getName())); + // get all related individuals via OWL API + Set<OWLLiteral> inds = null; - // get all related individuals via OWL API - Set<OWLLiteral> inds = null; + inds = reasoner.getDataPropertyValues(ind, prop); - inds = reasoner.getDataPropertyValues(ind, prop); + // convert data back to DL-Learner structures + SortedSet<Double> is = new TreeSet<Double>(); + for (OWLLiteral oi : inds) { + Double d = Double.parseDouble(oi.getLiteral()); + is.add(d); + } + map.put(i, is); + } + return map; + } - // convert data back to DL-Learner structures - SortedSet<Double> is = new TreeSet<Double>(); - for (OWLLiteral oi : inds) { - Double d = Double.parseDouble(oi.getLiteral()); - is.add(d); - } - map.put(i, is); - } - return map; - } + @Override + public Map<Individual, SortedSet<Constant>> getDatatypeMembersImpl( + DatatypeProperty datatypeProperty) { + OWLDataProperty prop = OWLAPIConverter.getOWLAPIDataProperty(datatypeProperty); + Map<Individual, SortedSet<Constant>> map = new TreeMap<Individual, SortedSet<Constant>>(); + for (Individual i : individuals) { + OWLNamedIndividual ind = factory.getOWLNamedIndividual(IRI.create(i.getName())); - @Override - public Map<Individual, SortedSet<Constant>> getDatatypeMembersImpl(DatatypeProperty datatypeProperty) { - OWLDataProperty prop = OWLAPIConverter.getOWLAPIDataProperty(datatypeProperty); - Map<Individual, SortedSet<Constant>> map = new TreeMap<Individual, SortedSet<Constant>>(); - for (Individual i : individuals) { - OWLNamedIndividual ind = factory.getOWLNamedIndividual(IRI.create(i.getName())); + // get all related values via OWL API + Set<OWLLiteral> constants = null; - // get all related values via OWL API - Set<OWLLiteral> constants = null; + constants = reasoner.getDataPropertyValues(ind, prop); - constants = reasoner.getDataPropertyValues(ind, prop); + // convert data back to DL-Learner structures + SortedSet<Constant> is = new TreeSet<Constant>(); + for (OWLLiteral literal : constants) { + // for typed constants we have to figure out the correct + // data type and value + if (!literal.isRDFPlainLiteral()) { + Datatype dt = OWLAPIConverter.convertDatatype(literal.getDatatype()); + is.add(new TypedConstant(literal.getLiteral(), dt)); + // for untyped constants we have to figure out the value + // and language tag (if any) + } else { + if (literal.hasLang()) + is.add(new UntypedConstant(literal.getLiteral(), literal.getLa... [truncated message content] |
From: <lor...@us...> - 2012-10-05 13:29:33
|
Revision: 3855 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3855&view=rev Author: lorenz_b Date: 2012-10-05 13:29:20 +0000 (Fri, 05 Oct 2012) Log Message: ----------- Added new test to evalute enrichment algorithms. Modified Paths: -------------- trunk/scripts/pom.xml trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/EnrichmentEvaluation.java Added Paths: ----------- trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/EnrichmentEvaluationMultithreaded.java Modified: trunk/scripts/pom.xml =================================================================== --- trunk/scripts/pom.xml 2012-10-05 13:27:40 UTC (rev 3854) +++ trunk/scripts/pom.xml 2012-10-05 13:29:20 UTC (rev 3855) @@ -20,10 +20,22 @@ <dependency> <groupId>org.dllearner</groupId> <artifactId>components-core</artifactId> + <exclusions> + <exclusion> + <artifactId>jena</artifactId> + <groupId>com.hp.hpl.jena</groupId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>org.dllearner</groupId> <artifactId>components-ext</artifactId> + <exclusions> + <exclusion> + <artifactId>jena</artifactId> + <groupId>com.hp.hpl.jena</groupId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>org.dllearner</groupId> @@ -44,6 +56,12 @@ <dependency> <groupId>com.dumontierlab</groupId> <artifactId>pdb2rdf-cli</artifactId> + <exclusions> + <exclusion> + <artifactId>jena</artifactId> + <groupId>com.hp.hpl.jena</groupId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>org.aksw.commons</groupId> Modified: trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/EnrichmentEvaluation.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/EnrichmentEvaluation.java 2012-10-05 13:27:40 UTC (rev 3854) +++ trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/EnrichmentEvaluation.java 2012-10-05 13:29:20 UTC (rev 3855) @@ -180,6 +180,8 @@ private static Logger logger = Logger.getLogger(EnrichmentEvaluation.class); + private final int maxNrOfThreads = 1; + // max. number of attempts per algorithm and entity, because to many queries // in a short time could cause blocking by the endpoint private final int maxAttempts = 5; @@ -247,15 +249,15 @@ objectPropertyAlgorithms.add(InverseObjectPropertyAxiomLearner.class); dataPropertyAlgorithms = new LinkedList<Class<? extends AxiomLearningAlgorithm>>(); - dataPropertyAlgorithms.add(DisjointDataPropertyAxiomLearner.class); - dataPropertyAlgorithms.add(EquivalentDataPropertyAxiomLearner.class); - dataPropertyAlgorithms.add(FunctionalDataPropertyAxiomLearner.class); - dataPropertyAlgorithms.add(DataPropertyDomainAxiomLearner.class); - dataPropertyAlgorithms.add(DataPropertyRangeAxiomLearner.class); - dataPropertyAlgorithms.add(SubDataPropertyOfAxiomLearner.class); +// dataPropertyAlgorithms.add(FunctionalDataPropertyAxiomLearner.class); +// dataPropertyAlgorithms.add(DataPropertyDomainAxiomLearner.class); + dataPropertyAlgorithms.add(DataPropertyRangeAxiomLearner.class); +// dataPropertyAlgorithms.add(EquivalentDataPropertyAxiomLearner.class); +// dataPropertyAlgorithms.add(SubDataPropertyOfAxiomLearner.class); +// dataPropertyAlgorithms.add(DisjointDataPropertyAxiomLearner.class); classAlgorithms = new LinkedList<Class<? extends LearningAlgorithm>>(); -// classAlgorithms.add(CELOE.class); + classAlgorithms.add(CELOE.class); classAlgorithms.add(DisjointClassesLearner.class); classAlgorithms.add(SimpleSubclassLearner.class); @@ -859,52 +861,54 @@ entities = dataProperties; } - ps.setString(1, algorithms.get(0).getAnnotation(ComponentAnn.class).name()); - ps.setDouble(2, threshold); - - //get all found axioms for specific axiom type - Set<String> foundAxioms = new TreeSet<String>(); - Map<String, Double> foundAndNotEntailedAxioms = new TreeMap<String, Double>(); - rs = ps.executeQuery(); - String axiom; - boolean entailed; - double score; - while(rs.next()){ - axiom = rs.getString(1); - entailed = rs.getBoolean(2); - score = rs.getDouble(3); + if(entities != null){ + ps.setString(1, algorithms.get(0).getAnnotation(ComponentAnn.class).name()); + ps.setDouble(2, threshold); - foundAxioms.add(axiom); - if(!entailed){ - foundAndNotEntailedAxioms.put(axiom, score); + //get all found axioms for specific axiom type + Set<String> foundAxioms = new TreeSet<String>(); + Map<String, Double> foundAndNotEntailedAxioms = new TreeMap<String, Double>(); + rs = ps.executeQuery(); + String axiom; + boolean entailed; + double score; + while(rs.next()){ + axiom = rs.getString(1); + entailed = rs.getBoolean(2); + score = rs.getDouble(3); + + foundAxioms.add(axiom); + if(!entailed){ + foundAndNotEntailedAxioms.put(axiom, score); + } } + + //get all axioms in the reference ontology for a specific axiom type + Set<String> relevantAxioms = getRelevantAxioms2(type, entities); + //compute the axioms which are in the reference ontology, but not be computed by the learning algorithm + Set<String> missedAxioms = org.mindswap.pellet.utils.SetUtils.difference(relevantAxioms, foundAxioms); + //compute the additional found axioms which were not entailed + for(String relAxiom : relevantAxioms){ + foundAndNotEntailedAxioms.remove(relAxiom); + } + Set<String> additionalAxioms = foundAndNotEntailedAxioms.keySet(); + + int total = relevantAxioms.size(); + int found = total - missedAxioms.size(); + + table2. + append(type.getName()).append(" & "). + append( found + "/" + total ).append(" & "). + append(additionalAxioms.size()). + append(" & & & \\\\\n"); + System.out.println(type.getName() + ": " + found + "/" + total); + + + //write additional axioms with score into file + writeToDisk(type, foundAndNotEntailedAxioms); + //write missed axioms into file + writeToDisk(type, missedAxioms); } - - //get all axioms in the reference ontology for a specific axiom type - Set<String> relevantAxioms = getRelevantAxioms2(type, entities); - //compute the axioms which are in the reference ontology, but not be computed by the learning algorithm - Set<String> missedAxioms = org.mindswap.pellet.utils.SetUtils.difference(relevantAxioms, foundAxioms); - //compute the additional found axioms which were not entailed - for(String relAxiom : relevantAxioms){ - foundAndNotEntailedAxioms.remove(relAxiom); - } - Set<String> additionalAxioms = foundAndNotEntailedAxioms.keySet(); - - int total = relevantAxioms.size(); - int found = total - missedAxioms.size(); - - table2. - append(type.getName()).append(" & "). - append( found + "/" + total ).append(" & "). - append(additionalAxioms.size()). - append(" & & & \\\\\n"); - System.out.println(type.getName() + ": " + found + "/" + total); - - - //write additional axioms with score into file - writeToDisk(type, foundAndNotEntailedAxioms); - //write missed axioms into file - writeToDisk(type, missedAxioms); } table2.append("\\end{tabulary}"); Added: trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/EnrichmentEvaluationMultithreaded.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/EnrichmentEvaluationMultithreaded.java (rev 0) +++ trunk/scripts/src/main/java/org/dllearner/scripts/evaluation/EnrichmentEvaluationMultithreaded.java 2012-10-05 13:29:20 UTC (rev 3855) @@ -0,0 +1,1434 @@ +/** + * Copyright (C) 2007-2011, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.scripts.evaluation; + +import static java.util.Arrays.asList; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.BufferedWriter; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.lang.reflect.InvocationTargetException; +import java.net.MalformedURLException; +import java.net.SocketTimeoutException; +import java.net.URI; +import java.net.URL; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.prefs.Preferences; + +import joptsimple.OptionException; +import joptsimple.OptionParser; +import joptsimple.OptionSet; + +import org.apache.commons.compress.compressors.CompressorException; +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.FileAppender; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.SimpleLayout; +import org.coode.owlapi.turtle.TurtleOntologyFormat; +import org.dllearner.algorithms.DisjointClassesLearner; +import org.dllearner.algorithms.SimpleSubclassLearner; +import org.dllearner.algorithms.celoe.CELOE; +import org.dllearner.algorithms.properties.AsymmetricObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.DataPropertyDomainAxiomLearner; +import org.dllearner.algorithms.properties.DataPropertyRangeAxiomLearner; +import org.dllearner.algorithms.properties.DisjointDataPropertyAxiomLearner; +import org.dllearner.algorithms.properties.DisjointObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.EquivalentDataPropertyAxiomLearner; +import org.dllearner.algorithms.properties.EquivalentObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.FunctionalDataPropertyAxiomLearner; +import org.dllearner.algorithms.properties.FunctionalObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.InverseFunctionalObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.InverseObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.IrreflexiveObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.ObjectPropertyDomainAxiomLearner; +import org.dllearner.algorithms.properties.ObjectPropertyRangeAxiomLearner; +import org.dllearner.algorithms.properties.ReflexiveObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.SubDataPropertyOfAxiomLearner; +import org.dllearner.algorithms.properties.SubObjectPropertyOfAxiomLearner; +import org.dllearner.algorithms.properties.SymmetricObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.TransitiveObjectPropertyAxiomLearner; +import org.dllearner.core.AbstractAxiomLearningAlgorithm; +import org.dllearner.core.AbstractReasonerComponent; +import org.dllearner.core.AnnComponentManager; +import org.dllearner.core.AxiomLearningAlgorithm; +import org.dllearner.core.ComponentAnn; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.ComponentManager; +import org.dllearner.core.EvaluatedAxiom; +import org.dllearner.core.EvaluatedDescription; +import org.dllearner.core.LearningAlgorithm; +import org.dllearner.core.LearningProblemUnsupportedException; +import org.dllearner.core.Score; +import org.dllearner.core.config.ConfigHelper; +import org.dllearner.core.owl.Axiom; +import org.dllearner.core.owl.DatatypeProperty; +import org.dllearner.core.owl.Entity; +import org.dllearner.core.owl.EquivalentClassesAxiom; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.core.owl.SubClassAxiom; +import org.dllearner.kb.SparqlEndpointKS; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SPARQLTasks; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlKnowledgeSource; +import org.dllearner.learningproblems.ClassLearningProblem; +import org.dllearner.learningproblems.Heuristics.HeuristicType; +import org.dllearner.reasoning.FastInstanceChecker; +import org.dllearner.reasoning.SPARQLReasoner; +import org.dllearner.utilities.CommonPrefixMap; +import org.dllearner.utilities.Files; +import org.dllearner.utilities.Helper; +import org.dllearner.utilities.datastructures.Datastructures; +import org.dllearner.utilities.datastructures.SetManipulation; +import org.dllearner.utilities.datastructures.SortedSetTuple; +import org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL2; +import org.dllearner.utilities.owl.DLLearnerAxiomConvertVisitor; +import org.dllearner.utilities.owl.OWLAPIAxiomConvertVisitor; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.ini4j.IniPreferences; +import org.ini4j.InvalidFileFormatException; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.io.RDFXMLOntologyFormat; +import org.semanticweb.owlapi.model.AxiomType; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAxiom; +import org.semanticweb.owlapi.model.OWLClass; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLDataProperty; +import org.semanticweb.owlapi.model.OWLDataPropertyAxiom; +import org.semanticweb.owlapi.model.OWLDisjointClassesAxiom; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLObjectProperty; +import org.semanticweb.owlapi.model.OWLObjectPropertyAxiom; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.model.OWLOntologyStorageException; +import org.semanticweb.owlapi.model.OWLSubClassOfAxiom; +import org.semanticweb.owlapi.reasoner.InconsistentOntologyException; +import org.semanticweb.owlapi.reasoner.InferenceType; +import org.semanticweb.owlapi.reasoner.OWLReasoner; +import org.semanticweb.owlapi.reasoner.ReasonerInterruptedException; +import org.semanticweb.owlapi.reasoner.TimeOutException; +import org.semanticweb.owlapi.util.DefaultPrefixManager; + +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + +import com.clarkparsia.pellet.owlapiv3.PelletReasonerFactory; +import com.hp.hpl.jena.query.QueryExecution; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; + +/** + * Evaluation of enrichment algorithms on DBpedia (Live). + * + * @author Jens Lehmann + * + */ +public class EnrichmentEvaluationMultithreaded { + + private static Logger logger = Logger.getLogger(EnrichmentEvaluationMultithreaded.class); + + private final int maxNrOfThreads = 4; + + // max. number of attempts per algorithm and entity, because to many queries + // in a short time could cause blocking by the endpoint + private final int maxAttempts = 5; + //after 2 attempts we force the iterative SPARQL 1.1 mode + private final int nrOfAttemptsBeforeForceToSPARQL1_0_Mode = 2; + + //delay between 2 attempts + private final int delayInMilliseconds = 5000; + + // max. execution time for each learner for each entity + private int maxExecutionTimeInSeconds = 25; + + // number of axioms which will be learned/considered (only applies to + // some learners) + private int nrOfAxiomsToLearn = 50; + + // only axioms with a score above this threshold will be considered + private double threshold = 0.7; + + private SparqlEndpoint endpoint; + + // can be used to only evaluate a part of DBpedia + private int maxObjectProperties = 0; + private int maxDataProperties = 0; + private int maxClasses = 0; + private List<Class<? extends AxiomLearningAlgorithm>> objectPropertyAlgorithms; + private List<Class<? extends AxiomLearningAlgorithm>> dataPropertyAlgorithms; + private List<Class<? extends LearningAlgorithm>> classAlgorithms; + + private String baseURI = "http://dbpedia.org/resource/"; + private Map<String,String> prefixes = new CommonPrefixMap(); + + private Connection conn; + private PreparedStatement ps; + + private OWLOntology dbPediaOntology; + private OWLReasoner reasoner; + private OWLDataFactory factory = new OWLDataFactoryImpl(); + + private static final String NAMESPACE = "http://dbpedia.org/ontology"; + + private SPARQLReasoner sparqlReasoner; + + private Map<Class<? extends LearningAlgorithm>, Set<OWLAxiom>> algorithm2Ontology; + private OWLOntologyManager manager; + + public EnrichmentEvaluationMultithreaded(SparqlEndpoint endpoint) { + this.endpoint = endpoint; + + prefixes = new HashMap<String,String>(); + prefixes.put("dbp","http://dbpedia.org/property/"); + prefixes.put("dbo","http://dbpedia.org/ontology/"); + prefixes.put("yago", "http://dbpedia.org/class/"); + + objectPropertyAlgorithms = new LinkedList<Class<? extends AxiomLearningAlgorithm>>(); + objectPropertyAlgorithms.add(DisjointObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(EquivalentObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(SubObjectPropertyOfAxiomLearner.class); + objectPropertyAlgorithms.add(FunctionalObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(InverseFunctionalObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(ObjectPropertyDomainAxiomLearner.class); + objectPropertyAlgorithms.add(ObjectPropertyRangeAxiomLearner.class); + objectPropertyAlgorithms.add(SymmetricObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(AsymmetricObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(TransitiveObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(IrreflexiveObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(ReflexiveObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(InverseObjectPropertyAxiomLearner.class); + + dataPropertyAlgorithms = new LinkedList<Class<? extends AxiomLearningAlgorithm>>(); + dataPropertyAlgorithms.add(FunctionalDataPropertyAxiomLearner.class); + dataPropertyAlgorithms.add(DataPropertyDomainAxiomLearner.class); + dataPropertyAlgorithms.add(DataPropertyRangeAxiomLearner.class); + dataPropertyAlgorithms.add(EquivalentDataPropertyAxiomLearner.class); + dataPropertyAlgorithms.add(SubDataPropertyOfAxiomLearner.class); + dataPropertyAlgorithms.add(DisjointDataPropertyAxiomLearner.class); + + classAlgorithms = new LinkedList<Class<? extends LearningAlgorithm>>(); + classAlgorithms.add(CELOE.class); + classAlgorithms.add(DisjointClassesLearner.class); + classAlgorithms.add(SimpleSubclassLearner.class); + + algorithm2Ontology = new HashMap<Class<? extends LearningAlgorithm>, Set<OWLAxiom>>(); + manager = OWLManager.createOWLOntologyManager(); + + initDBConnection(); + loadCurrentDBpediaOntology2(); + } + + private void initDBConnection() { + try { + String iniFile = "db_settings.ini"; + Preferences prefs = new IniPreferences(new FileReader(iniFile)); + String dbServer = prefs.node("database").get("server", null); + String dbName = prefs.node("database").get("name", null); + String dbUser = prefs.node("database").get("user", null); + String dbPass = prefs.node("database").get("pass", null); + + Class.forName("com.mysql.jdbc.Driver"); + String url = "jdbc:mysql://" + dbServer + "/" + dbName; + conn = DriverManager.getConnection(url, dbUser, dbPass); + + ps = conn.prepareStatement("INSERT INTO evaluation (" + + "entity, algorithm, axiom, score, runtime_ms, entailed ) " + "VALUES(?,?,?,?,?,?)"); + + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } catch (SQLException e) { + e.printStackTrace(); + } catch (InvalidFileFormatException e) { + e.printStackTrace(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + public void dropAndCreateTable(){ + try { + Statement s = conn.createStatement(); + s.executeUpdate("DROP TABLE IF EXISTS evaluation"); + s.executeUpdate("CREATE TABLE evaluation (" + + "id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY," + + "entity VARCHAR(200), algorithm VARCHAR(100), axiom VARCHAR(500), score DOUBLE, runtime_ms INT(20), entailed BOOLEAN)"); + s.close(); + + } catch (SQLException e) { + e.printStackTrace(); + } + } + + private void writeToDB(String entity, String algorithm, String axiom, double score, long runTime, boolean entailed) { + try { + ps.setString(1, entity); + ps.setString(2, algorithm); + ps.setString(3, axiom); + ps.setDouble(4, score); + ps.setLong(5, runTime); + ps.setBoolean(6, entailed); + + ps.executeUpdate(); + } catch (SQLException e) { + logger.error("Error while writing to DB.", e); + e.printStackTrace(); + } + + } + + public void start(boolean runClassAlgorithms, boolean runObjectPropertyAlgorithms, boolean runDataPropertyAlgorithms) throws IllegalArgumentException, SecurityException, InstantiationException, + IllegalAccessException, InvocationTargetException, NoSuchMethodException, + ComponentInitException, InterruptedException { + + long overallStartTime = System.currentTimeMillis(); + + SparqlEndpointKS ks = new SparqlEndpointKS(endpoint); + ks.init(); + + sparqlReasoner = new SPARQLReasoner(ks); + sparqlReasoner.setCache(new ExtractionDBCache("cache")); + sparqlReasoner.setUseCache(true); + sparqlReasoner.prepareSubsumptionHierarchy(); + sparqlReasoner.precomputePopularity(); + + if(runClassAlgorithms){ + evaluateClasses(ks); + Thread.sleep(20000); + } + + if(runObjectPropertyAlgorithms){ + evaluateObjectProperties(ks); + Thread.sleep(20000); + } + + if(runDataPropertyAlgorithms){ + evaluateDataProperties(ks); + } + + logger.info("Overall runtime: " + (System.currentTimeMillis()-overallStartTime)/1000 + "s."); + + } + + private void evaluateObjectProperties(final SparqlEndpointKS ks)throws IllegalArgumentException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, ComponentInitException, InterruptedException{ + Set<ObjectProperty> properties = new SPARQLTasks(ks.getEndpoint()).getAllObjectProperties(); + logger.info("Evaluating " + properties.size() + " object properties..."); + + for (final Class<? extends AxiomLearningAlgorithm> algorithmClass : objectPropertyAlgorithms) { + Thread.sleep(5000); + + Set<OWLAxiom> axioms = new HashSet<OWLAxiom>(); + algorithm2Ontology.put(algorithmClass, axioms); + int propCnt = 0; + ExecutorService threadPool = Executors.newFixedThreadPool(maxNrOfThreads); + for (final ObjectProperty property : properties) { + + threadPool.execute(new Runnable() { + + @Override + public void run() { + String algName = ""; + try { + AxiomLearningAlgorithm learner = algorithmClass.getConstructor(SparqlEndpointKS.class).newInstance( + ks); + ((AbstractAxiomLearningAlgorithm) learner).setReasoner(sparqlReasoner); + ((AbstractAxiomLearningAlgorithm) learner).addFilterNamespace(NAMESPACE); + ConfigHelper.configure(learner, "propertyToDescribe", property.toString()); + ConfigHelper.configure(learner, "maxExecutionTimeInSeconds", maxExecutionTimeInSeconds); + learner.init(); + algName = AnnComponentManager.getName(learner); + + boolean emptyEntity = sparqlReasoner.getPopularity(property) == 0; + if (emptyEntity) { + logger.warn("Empty entity: " + property); + } + + if (emptyEntity) { + writeToDB(property.toManchesterSyntaxString(baseURI, prefixes), algName, "EMPTY_ENTITY", 0, 0, + false); + } else { + applyLearningAlgorithm(learner, property); + + } + + } catch (Exception e) { + logger.error("Error occured for object property " + property.getName() + " with algorithm " + + algName, e); + } + } + }); + + propCnt++; + if (maxObjectProperties != 0 && propCnt == maxObjectProperties) { + break; + } + + } + threadPool.shutdown(); + while (!threadPool.isTerminated()) { + + } + } + } + + private void applyLearningAlgorithm(AxiomLearningAlgorithm algorithm, Entity entity){ + int attempt = 0; + long startTime = 0; + boolean timeout = true; + String algName = AnnComponentManager.getName(algorithm); + while(((AbstractAxiomLearningAlgorithm)algorithm).isTimeout() && attempt++ < maxAttempts){ + if(attempt > 1){ + try { + logger.warn("Got timeout in " + algName + " for entity " + entity.getName() + ". Waiting " + delayInMilliseconds + " ms ..."); + Thread.sleep(delayInMilliseconds); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + logger.info("Applying " + algName + " on " + entity.toString() + " ... (Attempt " + attempt + ")"); + startTime = System.currentTimeMillis(); + try { + ((AbstractAxiomLearningAlgorithm)algorithm).setForceSPARQL_1_0_Mode(attempt > nrOfAttemptsBeforeForceToSPARQL1_0_Mode); + algorithm.start(); + timeout = ((AbstractAxiomLearningAlgorithm)algorithm).isTimeout(); + } catch (Exception e) { + if(e.getCause() instanceof SocketTimeoutException){ + + } else { + e.printStackTrace(); + } + } + } + + long runTime = System.currentTimeMillis() - startTime; + List<EvaluatedAxiom> learnedAxioms = algorithm + .getCurrentlyBestEvaluatedAxioms(nrOfAxiomsToLearn); + + if(timeout && learnedAxioms.isEmpty()){ + writeToDB(entity.toManchesterSyntaxString(baseURI, prefixes), algName, "TIMEOUT", 0, runTime, false); + } else if (learnedAxioms == null || learnedAxioms.isEmpty()) { + writeToDB(entity.toManchesterSyntaxString(baseURI, prefixes), algName, "NULL", 0, runTime, false); + } else { + for (EvaluatedAxiom learnedAxiom : learnedAxioms) { + double score = learnedAxiom.getScore().getAccuracy(); + if (Double.isNaN(score)) { + score = -1; + } + writeToDB(entity.toManchesterSyntaxString(baseURI, prefixes) .toString(), algName, learnedAxiom.getAxiom().toManchesterSyntaxString(baseURI, prefixes), + score, runTime, isEntailed(learnedAxiom)); + if(score >= threshold){ + algorithm2Ontology.get(algorithm.getClass()).add(OWLAPIAxiomConvertVisitor.convertAxiom(learnedAxiom.getAxiom())); + } + } + } + + } + + private void evaluateDataProperties(final SparqlEndpointKS ks) throws IllegalArgumentException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, ComponentInitException, InterruptedException{ + Set<DatatypeProperty> properties = new SPARQLTasks(ks.getEndpoint()).getAllDataProperties(); + logger.info("Evaluating " + properties.size() + " data properties..."); + for (final Class<? extends AxiomLearningAlgorithm> algorithmClass : dataPropertyAlgorithms) { + Thread.sleep(5000); + int propCnt = 0; + + Set<OWLAxiom> axioms = new HashSet<OWLAxiom>(); + algorithm2Ontology.put(algorithmClass, axioms); + + ExecutorService threadPool = Executors.newFixedThreadPool(maxNrOfThreads); + for (final DatatypeProperty property : properties) { + + threadPool.execute(new Runnable() { + + @Override + public void run() { + String algName = ""; + try { + AxiomLearningAlgorithm learner = algorithmClass.getConstructor(SparqlEndpointKS.class).newInstance( + ks); + ((AbstractAxiomLearningAlgorithm) learner).setReasoner(sparqlReasoner); + ((AbstractAxiomLearningAlgorithm) learner).addFilterNamespace(NAMESPACE); + ConfigHelper.configure(learner, "propertyToDescribe", property.toString()); + ConfigHelper.configure(learner, "maxExecutionTimeInSeconds", maxExecutionTimeInSeconds); + learner.init(); + algName = AnnComponentManager.getName(learner); + + boolean emptyEntity = sparqlReasoner.getPopularity(property) == 0; + if (emptyEntity) { + logger.warn("Empty entity: " + property); + } + + if (emptyEntity) { + writeToDB(property.toManchesterSyntaxString(baseURI, prefixes), algName, "EMPTY_ENTITY", 0, 0, + false); + } else { + applyLearningAlgorithm(learner, property); + + } + + } catch (Exception e) { + logger.error("Error occured for data property " + property.getName() + " with algorithm " + + algName, e); + } + } + }); + + propCnt++; + if (maxDataProperties != 0 && propCnt == maxDataProperties) { + break; + } + + } + threadPool.shutdown(); + while (!threadPool.isTerminated()) { + + } + } + } + + private void evaluateClasses(final SparqlEndpointKS ks) throws IllegalArgumentException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, ComponentInitException, InterruptedException{ + Set<NamedClass> classes = new SPARQLTasks(ks.getEndpoint()).getAllClasses(); + logger.info("Evaluating " + classes.size() + " classes..."); + for (final Class<? extends LearningAlgorithm> algorithmClass : classAlgorithms) { + ExecutorService threadPool = null; + if(algorithmClass == CELOE.class){ + + } else { + threadPool = Executors.newFixedThreadPool(maxNrOfThreads); + } + int classesCnt = 0; + Thread.sleep(5000); + + Set<OWLAxiom> axioms = new HashSet<OWLAxiom>(); + algorithm2Ontology.put(algorithmClass, axioms); + + for (final NamedClass cls : classes) { + try{ + String algName = ""; + if(algorithmClass == CELOE.class){ + algName = CELOE.class.getAnnotation(ComponentAnn.class).name(); + } else { + LearningAlgorithm learner = algorithmClass.getConstructor( + SparqlEndpointKS.class).newInstance(ks); + algName = AnnComponentManager.getName(learner); + } + List<EvaluatedAxiom> learnedAxioms = new ArrayList<EvaluatedAxiom>(); + boolean emptyEntity = sparqlReasoner.getPopularity(cls) == 0; + if(emptyEntity){ + logger.warn("Empty entity: " + cls); + writeToDB(cls.toManchesterSyntaxString(baseURI, prefixes), algName, "EMPTY_ENTITY", 0, 0, false); + } else { + long startTime = System.currentTimeMillis(); + boolean timeout = false; + if(algorithmClass == CELOE.class){ + logger.info("Applying " + algName + " on " + cls + " ... "); + learnedAxioms = applyCELOE(ks, cls, false); + long runTime = System.currentTimeMillis() - startTime; + if(timeout && learnedAxioms.isEmpty()){ + writeToDB(cls.toManchesterSyntaxString(baseURI, prefixes), algName, "TIMEOUT", 0, runTime, false); + } else if (learnedAxioms == null || learnedAxioms.isEmpty()) { + writeToDB(cls.toManchesterSyntaxString(baseURI, prefixes), algName, "NULL", 0, runTime, false); + } else { + for (EvaluatedAxiom learnedAxiom : learnedAxioms) { + double score = learnedAxiom.getScore().getAccuracy(); + if (Double.isNaN(score)) { + score = -1; + } + writeToDB(cls.toManchesterSyntaxString(baseURI, prefixes) .toString(), algName, learnedAxiom.getAxiom().toManchesterSyntaxString(baseURI, prefixes), + score, runTime, isEntailed(learnedAxiom)); + } + } + } else { + threadPool.execute(new Runnable() { + + @Override + public void run() { + String algName = ""; + try { + LearningAlgorithm learner = algorithmClass.getConstructor( + SparqlEndpointKS.class).newInstance(ks); + algName = AnnComponentManager.getName(learner); + ((AbstractAxiomLearningAlgorithm)learner).setReasoner(sparqlReasoner); + ConfigHelper.configure(learner, "classToDescribe", cls.toString()); + ConfigHelper.configure(learner, "maxExecutionTimeInSeconds", + maxExecutionTimeInSeconds); + learner.init(); + applyLearningAlgorithm((AxiomLearningAlgorithm) learner, cls); + } catch (Exception e) { + logger.error("Error occured for class " + cls.getName() + " with algorithm " + + algName, e); + } + } + }); + + } + + } + + classesCnt++; + if (maxClasses != 0 && classesCnt == maxClasses) { + break; + } + + } catch(Exception e){ + logger.error("Error occured for class " + cls.getName(), e); + } + } + if(algorithmClass != CELOE.class){ + threadPool.shutdown(); + while (!threadPool.isTerminated()) { + + } + } + } + } + + private List<EvaluatedAxiom> applyCELOE(SparqlEndpointKS ks, NamedClass nc, boolean equivalence) throws ComponentInitException { + // get instances of class as positive examples + SPARQLReasoner sr = new SPARQLReasoner(ks); + SortedSet<Individual> posExamples = sr.getIndividuals(nc, 20); + SortedSet<String> posExStr = Helper.getStringSet(posExamples); + + // get negative examples via various strategies + System.out.print("finding negatives ... "); + AutomaticNegativeExampleFinderSPARQL2 finder = new AutomaticNegativeExampleFinderSPARQL2(ks.getEndpoint()); + SortedSet<String> negExStr = finder.getNegativeExamples(nc.getName(), posExStr); + negExStr = SetManipulation.fuzzyShrink(negExStr, 20); + SortedSet<Individual> negExamples = Helper.getIndividualSet(negExStr); + SortedSetTuple<Individual> examples = new SortedSetTuple<Individual>(posExamples, negExamples); + + System.out.println("done (" + negExStr.size()+ ")"); + + ComponentManager cm = ComponentManager.getInstance(); + + SparqlKnowledgeSource ks2 = cm.knowledgeSource(SparqlKnowledgeSource.class); + ks2.setInstances(Datastructures.individualSetToStringSet(examples.getCompleteSet())); + ks2.setUrl(ks.getEndpoint().getURL()); + ks2.setDefaultGraphURIs(new TreeSet<String>(ks.getEndpoint().getDefaultGraphURIs())); + ks2.setUseLits(false); + ks2.setUseCacheDatabase(true); + ks2.setRecursionDepth(2); + ks2.setCloseAfterRecursion(true); +// ks2.getConfigurator().setSaveExtractedFragment(true); + System.out.println("getting fragment ... "); + ks2.init(); + System.out.println("done"); + + AbstractReasonerComponent rc = cm.reasoner(FastInstanceChecker.class, ks2); + rc.init(); + + // TODO: super class learning + ClassLearningProblem lp = cm.learningProblem(ClassLearningProblem.class, rc); +// lp.setPositiveExamples(posExamples); +// lp.setNegativeExamples(negExamples); +// try { + lp.setClassToDescribe(nc); +// } catch (MalformedURLException e1) { +// e1.printStackTrace(); +// } +// lp.setType("equivalence"); + lp.setEquivalence(true); +// lp.setAccuracyMethod("fmeasure"); + lp.setHeuristic(HeuristicType.FMEASURE); + lp.setUseApproximations(false); + lp.setMaxExecutionTimeInSeconds(10); + lp.init(); + + CELOE la = null; + try { + la = cm.learningAlgorithm(CELOE.class, lp, rc); + } catch (LearningProblemUnsupportedException e) { + e.printStackTrace(); + } +// CELOEConfigurator cc = la.getConfigurator(); + la.setMaxExecutionTimeInSeconds(10); + la.setNoisePercentage(25); + la.init(); + System.out.print("running CELOE ... "); + la.start(); + System.out.println("done"); + + // convert the result to axioms (to make it compatible with the other algorithms) + List<? extends EvaluatedDescription> learnedDescriptions = la.getCurrentlyBestEvaluatedDescriptions(threshold); + List<EvaluatedAxiom> evaluatedAxioms = new LinkedList<EvaluatedAxiom>(); + for(EvaluatedDescription learnedDescription : learnedDescriptions) { + Axiom axiom; + if(equivalence) { + axiom = new EquivalentClassesAxiom(nc, learnedDescription.getDescription()); + } else { + axiom = new SubClassAxiom(nc, learnedDescription.getDescription()); + } + Score score = lp.computeScore(learnedDescription.getDescription()); + evaluatedAxioms.add(new EvaluatedAxiom(axiom, score)); + } + + cm.freeAllComponents(); + return evaluatedAxioms; + } + + public void printResultsPlain() { + + } + + public void printResultsLaTeX() throws Exception{ + List<Class<? extends LearningAlgorithm>> algorithms = new ArrayList<Class<? extends LearningAlgorithm>>(); + algorithms.addAll(classAlgorithms); + algorithms.addAll(objectPropertyAlgorithms); + algorithms.addAll(dataPropertyAlgorithms); + + //create view which contains only entries without TIMEOUT and NULL + PreparedStatement ps = conn.prepareStatement("CREATE OR REPLACE VIEW evaluation_cleaned AS (SELECT * FROM evaluation WHERE axiom != ? AND axiom != ?)"); + ps.setString(1, "NULL"); + ps.setString(2, "TIMEOUT"); + ps.execute(); + + StringBuilder table1 = new StringBuilder(); + table1.append("\\begin{tabulary}{\\textwidth}{LRRRRR}\\toprule\n"); + table1.append(" algorithm & Avg. \\#suggestions & Avg. runtime in ms & timeout in \\% & Avg. score & Avg. maximum score\\\\\\midrule\n"); + + for(Class<? extends LearningAlgorithm> algo : algorithms){ + + String algoName = algo.getAnnotation(ComponentAnn.class).name(); + + //get number of entities + ps = conn.prepareStatement("SELECT COUNT(DISTINCT entity) FROM evaluation WHERE algorithm=?"); + ps.setString(1, algoName); + java.sql.ResultSet rs = ps.executeQuery(); + rs.next(); + int overallNumberOfEntities = rs.getInt(1); + + //get number of entities with empty result + ps = conn.prepareStatement("SELECT COUNT(DISTINCT entity) FROM evaluation WHERE algorithm=? AND axiom=?"); + ps.setString(1, algoName); + ps.setString(2, "NULL"); + rs = ps.executeQuery(); + rs.next(); + int numberOfEntitiesWithEmptyResult = rs.getInt(1); + + //get number of entities with timout + ps = conn.prepareStatement("SELECT COUNT(DISTINCT entity) FROM evaluation WHERE algorithm=? AND axiom=?"); + ps.setString(1, algoName); + ps.setString(2, "TIMEOUT"); + rs = ps.executeQuery(); + rs.next(); + int numberOfEntitiesWithTimeout = rs.getInt(1); + + //compute average number of suggestions above threshold + ps = conn.prepareStatement("SELECT AVG(cnt) FROM (SELECT entity, COUNT(DISTINCT axiom) AS cnt FROM (SELECT * FROM evaluation WHERE algorithm=? AND score >=?) AS A GROUP BY entity) AS B"); + ps.setString(1, algoName); + ps.setDouble(2, threshold); + rs = ps.executeQuery(); + rs.next(); + double avgSuggestionsAboveThreshold = round(rs.getDouble(1)); + + //compute average runtime + ps = conn.prepareStatement("SELECT AVG(runtime) FROM (SELECT MAX(runtime_ms) AS runtime FROM evaluation WHERE algorithm=?) AS A"); + ps.setString(1, algoName); + rs = ps.executeQuery(); + rs.next(); + double avgRuntimeInMilliseconds = rs.getDouble(1); + + //compute ratio for complete timeouts + double timeoutRatio = round((double)numberOfEntitiesWithTimeout / overallNumberOfEntities); + + //compute avg. score + ps = conn.prepareStatement("SELECT AVG(avg) FROM (SELECT AVG(score) AS avg FROM evaluation_cleaned WHERE algorithm=? AND score >= ? GROUP BY entity) AS A"); + ps.setString(1, algoName); + ps.setDouble(2, threshold); + rs = ps.executeQuery(); + rs.next(); + double avgScore = round(rs.getDouble(1)); + + //compute avg. max. score + ps = conn.prepareStatement("SELECT AVG(max) FROM (SELECT MAX(score) AS max FROM evaluation_cleaned WHERE algorithm=? AND score>=? GROUP BY entity) AS A"); + ps.setString(1, algoName); + ps.setDouble(2, threshold); + rs = ps.executeQuery(); + rs.next(); + double avgMaxScore = round(rs.getDouble(1)); + + table1. + append(algoName.replace("axiom learner", "").trim()).append(" & "). + append(avgSuggestionsAboveThreshold).append(" & "). + append(avgRuntimeInMilliseconds).append(" & "). + append(timeoutRatio).append(" & "). + append(avgScore).append(" & "). + append(avgMaxScore). + append("\\\\\n"); + + } + table1.append("\\bottomrule\n\\end{tabulary}"); + System.out.println(table1.toString()); + write2Disk(table1.toString(), "evaluation/table1.tex"); + + + //second part of evaluation + + StringBuilder table2 = new StringBuilder(); + table2.append("\\begin{tabulary}{\\textwidth}{LCCCCC}\\toprule\n"); + table2.append("& & & \\multicolumn{3}{c}{Estimated precision} \\\\\n"); + table2.append(" axiom type & recall & additional axioms & no & maybe & yes \\\\\\midrule\n"); + + //get all axiomtypes and corresponding algorithm + Map<AxiomType<? extends OWLAxiom>, List<Class<? extends LearningAlgorithm>>> axiomType2Algorithm = getAxiomTypesWithLearningAlgorithms(); + + // get all entities in database because we compute recall only for axioms of entities which we have tested + // we use only entities for which triples in the endpoint are contained + java.sql.ResultSet rs = conn.prepareStatement("SELECT DISTINCT entity FROM evaluation WHERE axiom != 'EMPTY_ENTITY'").executeQuery(); + Set<OWLEntity> allEntities = new HashSet<OWLEntity>(); + Set<OWLEntity> classes = new HashSet<OWLEntity>(); + Set<OWLEntity> objectProperties = new HashSet<OWLEntity>(); + Set<OWLEntity> dataProperties = new HashSet<OWLEntity>(); + IRI iri; + while(rs.next()){ + iri = IRI.create("http://dbpedia.org/ontology/" + rs.getString(1).substring(4)); + if(dbPediaOntology.containsClassInSignature(iri)){ + allEntities.add(factory.getOWLClass(iri)); + classes.add(factory.getOWLClass(iri)); + } else if(dbPediaOntology.containsObjectPropertyInSignature(iri)){ + allEntities.add(factory.getOWLObjectProperty(iri)); + objectProperties.add(factory.getOWLObjectProperty(iri)); + } else if(dbPediaOntology.containsDataPropertyInSignature(iri)){ + allEntities.add(factory.getOWLDataProperty(iri)); + dataProperties.add(factory.getOWLDataProperty(iri)); + } + } + + + + //compute recall for each axiom type + ps = conn.prepareStatement("SELECT axiom, entailed, score FROM evaluation WHERE algorithm=? AND score>=0 AND entity=?"); + Set<OWLEntity> entities = null; + for(Entry<AxiomType<? extends OWLAxiom>, List<Class<? extends LearningAlgorithm>>> entry : axiomType2Algorithm.entrySet()){ + AxiomType<? extends OWLAxiom> type = entry.getKey(); + algorithms = entry.getValue(); + entities = null; + if(classAlgorithms.containsAll(algorithms)){ + entities = classes; + } else if(objectPropertyAlgorithms.containsAll(algorithms)){ + entities = objectProperties; + } else if(dataPropertyAlgorithms.containsAll(algorithms)){ + entities = dataProperties; + } + + + DefaultPrefixManager pm = new DefaultPrefixManager(); + pm.setPrefix("dbo:", "http://dbpedia.org/ontology/"); + + Set<String> missedAxioms = new TreeSet<String>(); + Set<String> additionalAxioms = new TreeSet<String>(); + Map<String, Double> foundAndNotEntailedAxioms = new TreeMap<String, Double>(); + + if(entities != null){ + //write learned axioms in separate TTL file + new File("evaluation/ontologies").mkdirs(); + OWLOntology ontology = manager.createOntology(IRI.create("http://dl-learner.org/ontologies/" + type.getName() + ".owl")); + if(algorithm2Ontology.containsKey(algorithms.get(0))){ + manager.addAxioms(ontology, algorithm2Ontology.get(algorithms.get(0))); + manager.saveOntology(ontology, new TurtleOntologyFormat(), new FileOutputStream(new File("evaluation/ontologies/" + type.getName() + ".ttl"))); + } + + for(OWLEntity entity : entities){ + Map<String, Double> axiom2Score = new HashMap<String, Double>(); + ps.setString(1, algorithms.get(0).getAnnotation(ComponentAnn.class).name()); +// ps.setDouble(2, threshold); + ps.setString(2, pm.getShortForm(entity)); + + //get all found axioms for specific axiom type + Set<String> foundAxioms = new TreeSet<String>(); + Map<String, Double> foundAndNotEntailedAxiomsTmp = new TreeMap<String, Double>(); + rs = ps.executeQuery(); + String axiom; + boolean entailed; + double score; + boolean emptyEntity = false; + while(rs.next()){ + axiom = rs.getString(1); + if(axiom.equalsIgnoreCase("empty_entity")){ + emptyEntity = true; + } + entailed = rs.getBoolean(2); + score = rs.getDouble(3); + if(!emptyEntity){ + if(score>=threshold){ + foundAxioms.add(axiom); + if(!entailed){ + foundAndNotEntailedAxiomsTmp.put(axiom, score); + } + } else { + axiom2Score.put(axiom, score); + } + + } + } + + //get all axioms in the reference ontology for a specific axiom type + Set<String> relevantAxioms = getRelevantAxioms2(type, Collections.singleton(entity)); + //compute the axioms which are in the reference ontology, but not be computed by the learning algorithm + Set<String> missedAxiomsTmp = org.mindswap.pellet.utils.SetUtils.difference(relevantAxioms, foundAxioms); + + Set<String> tmp = new TreeSet<String>(); + for(String ax : missedAxiomsTmp){ + if(emptyEntity){ + tmp.add(ax + "\t(EMPTY_ENTITY)"); + } else if(axiom2Score.containsKey(ax)){ + tmp.add(ax + "\t(" + axiom2Score.get(ax) + ")"); + } else { + tmp.add(ax); + } + } + missedAxiomsTmp = tmp; + + missedAxioms.addAll(missedAxiomsTmp); + //compute the additional found axioms which were not entailed + for(String relAxiom : relevantAxioms){ + foundAndNotEntailedAxiomsTmp.remove(relAxiom); + } + Set<String> additionalAxiomsTmp = foundAndNotEntailedAxiomsTmp.keySet(); + additionalAxioms.addAll(additionalAxiomsTmp); + foundAndNotEntailedAxioms.putAll(foundAndNotEntailedAxiomsTmp); + } + + + + + int total = getRelevantAxioms2(type, entities).size(); + int found = total - missedAxioms.size(); + + table2. + append(type.getName()).append(" & "). + append( found + "/" + total ).append(" & "). + append(additionalAxioms.size()). + append(" & & & \\\\\n"); + System.out.println(type.getName() + ": " + found + "/" + total); + + + //write additional axioms with score into file + writeToDisk(type, foundAndNotEntailedAxioms); + //write missed axioms into file + writeToDisk(type, missedAxioms); + } + } + + table2.append("\\end{tabulary}"); + System.out.println(table2.toString()); + write2Disk(table2.toString(), "evaluation/table2.tex"); + } + + private void writeToDisk(AxiomType<? extends OWLAxiom> axiomType, Map<String, Double> axiomsWithAccurracy){ + String fileName = axiomType.getName().replaceAll(" ", "_") + ".txt"; + + BufferedWriter out = null; + try { + File dir = new File("evaluation/additional"); + if(!dir.exists()){ + dir.mkdirs(); + } + + File file = new File(dir + File.separator + fileName); + if(!file.exists()){ + file.createNewFile(); + } + out = new BufferedWriter(new FileWriter(file)); + + //sort by values and write only the first 100 + int i = 0; + for(Entry<String, Double> entry : sortByValues(axiomsWithAccurracy)){ + i++; + out.write(entry.getKey() + " (" + round(entry.getValue())*100 + "%)"); + out.newLine(); + if(i == 100){ + break; + } + } + } catch (IOException e) { + e.printStackTrace(); + } finally { + if(out != null){ + try { + out.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + } + + private void write2Disk(String content, String file){ + try { + new File(file).createNewFile(); + BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file)); + bos.write(content.getBytes()); + bos.close(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + private void writeToDisk(AxiomType<? extends OWLAxiom> axiomType, Set<String> axioms){ + String fileName = axiomType.getName().replaceAll(" ", "_") + ".txt"; + + BufferedWriter out = null; + try { + File dir = new File("evaluation/missed"); + if(!dir.exists()){ + dir.mkdirs(); + } + + File file = new File(dir + File.separator + fileName); + if(!file.exists()){ + file.createNewFile(); + } + out = new BufferedWriter(new FileWriter(file)); + for(String axiom : axioms){ + out.write(axiom); + out.newLine(); + } + } catch (IOException e) { + e.printStackTrace(); + } finally { + if(out != null){ + try { + out.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + } + + protected <K, V extends Comparable<V>> List<Entry<K, V>> sortByValues(Map<K, V> map){ + List<Entry<K, V>> entries = new ArrayList<Entry<K, V>>(map.entrySet()); + Collections.sort(entries, new Comparator<Entry<K, V>>() { + + @Override + public int compare(Entry<K, V> o1, Entry<K, V> o2) { + return o2.getValue().compareTo(o1.getValue()); + } + }); + return entries; + } + + private Map<AxiomType<? extends OWLAxiom>, List<Class<? extends LearningAlgorithm>>> getAxiomTypesWithLearningAlgorithms(){ + Map<AxiomType<? extends OWLAxiom>, List<Class<? extends LearningAlgorithm>>> axiomType2Algorithm = new LinkedHashMap<AxiomType<? extends OWLAxiom>, List<Class<? extends LearningAlgorithm>>>(); + axiomType2Algorithm.put(AxiomType.SUBCLASS_OF, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{SimpleSubclassLearner.class}));//, CELOE.class})); +// axiomType2Algorithm.put(AxiomType.EQUIVALENT_CLASSES, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{CELOE.class})); + axiomType2Algorithm.put(AxiomType.DISJOINT_CLASSES, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{DisjointClassesLearner.class})); + + axiomType2Algorithm.put(AxiomType.SUB_OBJECT_PROPERTY, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{SubObjectPropertyOfAxiomLearner.class})); + axiomType2Algorithm.put(AxiomType.EQUIVALENT_OBJECT_PROPERTIES, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{EquivalentObjectPropertyAxiomLearner.class})); + axiomType2Algorithm.put(AxiomType.OBJECT_PROPERTY_DOMAIN, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{ObjectPropertyDomainAxiomLearner.class})); + axiomType2Algorithm.put(AxiomType.OBJECT_PROPERTY_RANGE, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{ObjectPropertyRangeAxiomLearner.class})); + axiomType2Algorithm.put(AxiomType.TRANSITIVE_OBJECT_PROPERTY, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{TransitiveObjectPropertyAxiomLearner.class})); + axiomType2Algorithm.put(AxiomType.FUNCTIONAL_OBJECT_PROPERTY, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{FunctionalObjectPropertyAxiomLearner.class})); + axiomType2Algorithm.put(AxiomType.INVERSE_FUNCTIONAL_OBJECT_PROPERTY, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{InverseFunctionalObjectPropertyAxiomLearner.class})); + axiomType2Algorithm.put(AxiomType.SYMMETRIC_OBJECT_PROPERTY, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{SymmetricObjectPropertyAxiomLearner.class})); + axiomType2Algorithm.put(AxiomType.ASYMMETRIC_OBJECT_PROPERTY, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{AsymmetricObjectPropertyAxiomLearner.class})); + axiomType2Algorithm.put(AxiomType.REFLEXIVE_OBJECT_PROPERTY, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{ReflexiveObjectPropertyAxiomLearner.class})); + axiomType2Algorithm.put(AxiomType.IRREFLEXIVE_OBJECT_PROPERTY, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{IrreflexiveObjectPropertyAxiomLearner.class})); + axiomType2Algorithm.put(AxiomType.INVERSE_OBJECT_PROPERTIES, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{InverseObjectPropertyAxiomLearner.class})); + + axiomType2Algorithm.put(AxiomType.SUB_DATA_PROPERTY, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{SubDataPropertyOfAxiomLearner.class})); + axiomType2Algorithm.put(AxiomType.EQUIVALENT_DATA_PROPERTIES, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{EquivalentDataPropertyAxiomLearner.class})); + axiomType2Algorithm.put(AxiomType.DATA_PROPERTY_DOMAIN, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{DataPropertyDomainAxiomLearner.class})); + axiomType2Algorithm.put(AxiomType.DATA_PROPERTY_RANGE, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{DataPropertyRangeAxiomLearner.class})); + axiomType2Algorithm.put(AxiomType.FUNCTIONAL_DATA_PROPERTY, Arrays.asList((Class<? extends LearningAlgorithm>[])new Class[]{FunctionalDataPropertyAxiomLearner.class})); + return axiomType2Algorithm; + } + + private Set<String> getRelevantAxioms(AxiomType<? extends OWLAxiom> axiomType, Set<OWLEntity> entities){ + Set<String> relevantAxioms = new HashSet<String>(); + for(OWLAxiom axiom : dbPediaOntology.getAxioms(axiomType)){ + if(!axiom.getClassesInSignature().contains(factory.getOWLThing())){ + if(isRelevantAxiom(axiom, entities)){ + String axiomString = DLLearnerAxiomConvertVisitor.getDLLearnerAxiom(axiom).toManchesterSyntaxString(baseURI, prefixes); + relevantAxioms.add(axiomString); + } + } + } + return relevantAxioms; + } + + private Set<String> getRelevantAxioms2(AxiomType<? extends OWLAxiom> axiomType, Set<OWLEntity> entities){ + Set<String> relevantAxioms = new HashSet<String>(); + if(entities.isEmpty()){ + return relevantAxioms; + } + Set<OWLAxiom> entityAxioms = new HashSet<OWLAxiom>(); + for(OWLEntity entity : entities){ + if(entity.isOWLDataProperty()){ + entityAxioms.addAll(dbPediaOntology.getAxioms((OWLDataProperty)entity)); + } else if(entity.isOWLObjectProperty()){ + entityAxioms.addAll(dbPediaOntology.getAxioms((OWLObjectProperty)entity)); + } else if(entity.isOWLClass()){ + entityAxioms.addAll(dbPediaOntology.getAxioms((OWLClass)entity)); + } + } + + for(OWLAxiom axiom : entityAxioms){ + if(axiom.getAxiomType() == axiomType && !axiom.getClassesInSignature().contains(factory.getOWLThing())){ + String axiomString = DLLearnerAxiomConvertVisitor.getDLLearnerAxiom(axiom).toManchesterSyntaxString(baseURI, prefixes); + relevantAxioms.add(axiomString); + } + } + return relevantAxioms; + } + + private boolean isRelevantAxiom(OWLAxiom axiom, Set<OWLEntity> entities){ + if(axiom instanceof OWLObjectPropertyAxiom){ + return containsOneOf(axiom.getObjectPropertiesInSignature(), entities); + } else if(axiom instanceof OWLDataPropertyAxiom){ + return containsOneOf(axiom.getDataPropertiesInSignature(), entities); + } else if(axiom instanceof OWLSubClassOfAxiom){ + return entities.contains(((OWLSubClassOfAxiom) axiom).getSubClass()); + } else if(axiom instanceof OWLDisjointClassesAxiom){ + return containsOneOf(axiom.getClassesInSignature(), entities); + } + return false; + } + + private <T> boolean containsOneOf(Collection<? extends T> c1, Collection<? extends T> c2){ + for(T element : c2){ + if(c1.contains(element)){ + return true; + } + } + return false; + } + + private boolean existsInDatabase(OWLAxiom ax){ + //if axiom contains owl:Thing it is trivially contained, so we can return TRUE here + if(ax.getClassesInSignature().contains(factory.getOWLThing())){ + return true; + } + try { + Axiom axiom = DLLearnerAxiomConvertVisitor.getDLLearnerAxiom(ax); + PreparedStatement ps = conn.prepareStatement("SELECT axiom FROM evaluation WHERE axiom = ?"); + ps.setString(1, axiom.toManchesterSyntaxString(baseURI, prefixes)); + ResultSet rs = ps.executeQuery(); + boolean exists = rs.next(); + ps.close(); + return exists; + } catch (SQLException e) { + e.printStackTrace(); + } + return false; + } + + private double round(double value){ + return Math.round( value * 10. ) / 10.; + } + + public String printHTMLTable() throws SQLException { + StringBuffer sb = new StringBuffer(); + Statement s = conn.createStatement(); + s.executeQuery("SELECT * FROM evaluation"); + java.sql.ResultSet rs = s.getResultSet(); + + ResultSetMetaData md = rs.getMetaData(); + int count = md.getColumnCount(); + sb.append("<table border=1>"); + sb.append("<tr>"); + for (int i = 1; i <= count; i++) { + sb.append("<th>"); + sb.append(md.getColumnLabel(i)); + sb.append("</th>"); + } + sb.append("</tr>"); + while (rs.next()) { + sb.append("<tr>"); + for (int i = 1; i <= count; i++) { + sb.append("<td>"); + sb.append(rs.getString(i)); + sb.append("</td>"); + } + sb.append("</tr>"); + } + sb.append("</table>"); + rs.close(); + s.close(); + return sb.toString(); + } + + private boolean isEntailed(EvaluatedAxiom evalAxiom){ + OWLAxiom axiom = OWLAPIConverter.getOWLAPIAxiom(evalAxiom.getAxiom()); + boolean entailed = reasoner.isEntailed(axiom); +// System.out.println(evalAxiom.getAxiom().toManchesterSyntaxString(baseURI, prefixes)); +// System.out.println(entailed); + return entailed; + } + + /** + * Loads DBpedia ontology from remote URL and initializes the reasoner. + */ + private void loadDBpediaOntology(){ + try { + URL url = new URL("http://downloads.dbpedia.org/3.6/dbpedia_3.6.owl.bz2"); + InputStream is = new BufferedInputStream(url.openStream()); + CompressorInputStream in = new CompressorStreamFactory().createCompressorInputStream("bzip2", is); + dbPediaOntology = OWLManager.createOWLOntologyManager().loadOntologyFromOntologyDocument(in); + reasoner = PelletReasonerFactory.getInstance().createNonBufferingReasoner(dbPediaOntology); + reasoner.precomputeInferences(InferenceType.CLASS_HIERARCHY); + } catch (MalformedURLException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } catch (CompressorException e) { + e.printStackTrace(); + } catch (OWLOntologyCreationException e) { + e.printStackTrace(); + } + } + + private void loadCurrentDBpediaOntology(){ + int limit = 1000; + int offset = 0; + String query = "CONSTRUCT {?s ?p ?o.} WHERE {?s ?p ?o} LIMIT %d OFFSET %d"; + Model model = ModelFactory.createDefaultModel(); + + QueryExecution qExec; + Model newModel; + boolean repeat = true; + while(repeat){ + repeat = false; + qExec = QueryExecutionFactory.sparqlService("http://live.dbpedia.org/sparql", QueryFactory.create(String.format(query, limit, offset)), "http://live.dbpedia.org/ontology"); + newModel = qExec.execConstruct(); + model.add(newModel); + repeat = newModel.size() > 0; + offset += limit; + } + try { + dbPediaOntology = convert(model); + reasoner = PelletReasonerFactory.getInstance().createNonBufferingReasoner(dbPediaOntology); + reasoner.precomputeInferences(InferenceType.CLASS_HIERARCHY); + System.out.println(reasoner.getSuperClasses( + factory.getOWLClass(IRI.create("http://dbpedia.org/ontology/Actor")), false).getFlattened()); + ... [truncated message content] |
From: <lor...@us...> - 2012-10-05 13:27:54
|
Revision: 3854 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3854&view=rev Author: lorenz_b Date: 2012-10-05 13:27:40 +0000 (Fri, 05 Oct 2012) Log Message: ----------- Added single query mode to SPARQL enrichment algorithms. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/AsymmetricObjectPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyDomainAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyRangeAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointDataPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentDataPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentObjectPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/FunctionalDataPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/FunctionalObjectPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/InverseFunctionalObjectPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/InverseObjectPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/IrreflexiveObjectPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/ObjectPropertyDomainAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/ObjectPropertyDomainAxiomLearner2.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/ObjectPropertyRangeAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/ReflexiveObjectPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/SubDataPropertyOfAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/SubObjectPropertyOfAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/SymmetricObjectPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/TransitiveObjectPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java trunk/components-core/src/main/java/org/dllearner/core/owl/DisjointClassesAxiom.java trunk/components-core/src/main/java/org/dllearner/core/owl/EquivalentObjectPropertiesAxiom.java trunk/components-core/src/main/java/org/dllearner/core/owl/Thing.java trunk/components-core/src/main/java/org/dllearner/reasoning/OWLAPIReasoner.java trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/core/owl/GenericDatatypePropertyAssertion.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java 2012-09-27 16:33:12 UTC (rev 3853) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java 2012-10-05 13:27:40 UTC (rev 3854) @@ -47,7 +47,6 @@ import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.learningproblems.AxiomScore; -import org.dllearner.learningproblems.Heuristics; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -157,7 +156,8 @@ } if(!forceSPARQL_1_0_Mode && ks.supportsSPARQL_1_1()){ - runSPARQL1_1_Mode(); +// runSPARQL1_1_Mode(); + runSingleQueryMode(); } else { runSPARQL1_0_Mode(); } @@ -165,6 +165,38 @@ logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); } + private void runSingleQueryMode(){ + //compute the overlap if exist + Map<NamedClass, Integer> class2Overlap = new HashMap<NamedClass, Integer>(); + String query = String.format("SELECT ?type (COUNT(*) AS ?cnt) WHERE {?s a <%s>. ?s a ?type.} GROUP BY ?type", classToDescribe.getName()); + ResultSet rs = executeSelectQuery(query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + NamedClass cls = new NamedClass(qs.getResource("type").getURI()); + int cnt = qs.getLiteral("cnt").getInt(); + class2Overlap.put(cls, cnt); + } + //for each property in knowledge base + for(NamedClass cls : allClasses){ + //get the popularity + int otherPopularity = reasoner.getPopularity(cls); + if(otherPopularity == 0){//skip empty properties + continue; + } + //get the overlap + int overlap = class2Overlap.containsKey(cls) ? class2Overlap.get(cls) : 0; + //compute the estimated precision + double precision = accuracy(otherPopularity, overlap); + //compute the estimated recall + double recall = accuracy(popularity, overlap); + //compute the final score + double score = 1 - fMEasure(precision, recall); + + currentlyBestEvaluatedDescriptions.add(new EvaluatedDescription(cls, new AxiomScore(score))); + } + } + private void runSPARQL1_0_Mode(){ Model model = ModelFactory.createDefaultModel(); int limit = 1000; Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java 2012-09-27 16:33:12 UTC (rev 3853) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/SimpleSubclassLearner.java 2012-10-05 13:27:40 UTC (rev 3854) @@ -19,9 +19,7 @@ package org.dllearner.algorithms; -import java.net.URL; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -109,11 +107,11 @@ @Override public List<EvaluatedAxiom> getCurrentlyBestEvaluatedAxioms(int nrOfAxioms) { - List<EvaluatedAxiom> axioms = new ArrayList<EvaluatedAxiom>(); + currentlyBestAxioms = new ArrayList<EvaluatedAxiom>(); for(EvaluatedDescription ed : getCurrentlyBestEvaluatedDescriptions(nrOfAxioms)){ - axioms.add(new EvaluatedAxiom(new SubClassAxiom(classToDescribe, ed.getDescription()), new AxiomScore(ed.getAccuracy()))); + currentlyBestAxioms.add(new EvaluatedAxiom(new SubClassAxiom(classToDescribe, ed.getDescription()), new AxiomScore(ed.getAccuracy()))); } - return axioms; + return currentlyBestAxioms; } @Override @@ -145,6 +143,16 @@ } } + if(!forceSPARQL_1_0_Mode && ks.supportsSPARQL_1_1()){ + runSingleQueryMode(); + } else { + runSPARQL1_0_Mode(); + } + + logger.info("...finished in {}ms. (Got {} rows)", (System.currentTimeMillis()-startTime), fetchedRows); + } + + private void runSPARQL1_0_Mode(){ Map<Individual, SortedSet<Description>> ind2Types = new HashMap<Individual, SortedSet<Description>>(); int limit = 1000; boolean repeat = true; @@ -153,9 +161,26 @@ createEvaluatedDescriptions(ind2Types); fetchedRows += 1000; } - + } + + private void runSingleQueryMode(){ + int total = reasoner.getPopularity(classToDescribe); - logger.info("...finished in {}ms. (Got {} rows)", (System.currentTimeMillis()-startTime), fetchedRows); + if(total > 0){ + String query = String.format("SELECT ?type (COUNT(DISTINCT ?s) AS ?cnt) WHERE {?s a <%s>. ?s a ?type} GROUP BY ?type ORDER BY DESC(?cnt)", classToDescribe.getName()); + ResultSet rs = executeSelectQuery(query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + if(!qs.get("type").isAnon()){ + NamedClass sup = new NamedClass(qs.getResource("type").getURI()); + int overlap = qs.get("cnt").asLiteral().getInt(); + if(!sup.getURI().equals(Thing.uri) && ! classToDescribe.equals(sup)){//omit owl:Thing and the class to describe itself + currentlyBestEvaluatedDescriptions.add(new EvaluatedDescription(sup, computeScore(total, overlap))); + } + } + } + } } public NamedClass getClassToDescribe() { @@ -234,8 +259,7 @@ } public static void main(String[] args) throws Exception{ - SparqlEndpointKS ks = new SparqlEndpointKS(new SparqlEndpoint(new URL("http://dbpedia.aksw.org:8902/sparql"), - Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList())); + SparqlEndpointKS ks = new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpediaLiveAKSW()); SPARQLReasoner reasoner = new SPARQLReasoner(ks); reasoner.prepareSubsumptionHierarchy(); @@ -244,7 +268,7 @@ l.setReasoner(reasoner); l.setReturnOnlyNewAxioms(true); - ConfigHelper.configure(l, "maxExecutionTimeInSeconds", 10); + ConfigHelper.configure(l, "maxExecutionTimeInSeconds", 50); l.setClassToDescribe(new NamedClass("http://dbpedia.org/ontology/SoccerClub")); l.init(); l.start(); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/properties/AsymmetricObjectPropertyAxiomLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/AsymmetricObjectPropertyAxiomLearner.java 2012-09-27 16:33:12 UTC (rev 3853) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/properties/AsymmetricObjectPropertyAxiomLearner.java 2012-10-05 13:27:40 UTC (rev 3854) @@ -22,6 +22,8 @@ import java.net.URL; import java.util.ArrayList; import java.util.Collections; +import java.util.SortedSet; +import java.util.TreeSet; import org.dllearner.core.AbstractAxiomLearningAlgorithm; import org.dllearner.core.ComponentAnn; @@ -29,12 +31,15 @@ import org.dllearner.core.config.ConfigOption; import org.dllearner.core.config.ObjectPropertyEditor; import org.dllearner.core.owl.AsymmetricObjectPropertyAxiom; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.KBElement; import org.dllearner.core.owl.ObjectProperty; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SparqlEndpoint; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.hp.hpl.jena.query.ParameterizedSparqlString; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; @@ -53,6 +58,9 @@ public AsymmetricObjectPropertyAxiomLearner(SparqlEndpointKS ks){ this.ks = ks; + super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s WHERE {?s ?p ?o. FILTER NOT EXISTS{?o ?p ?s}}"); + super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s WHERE {?s ?p ?o. ?o ?p ?s}"); + } public ObjectProperty getPropertyToDescribe() { @@ -88,18 +96,18 @@ } private void runSPARQL1_0_Mode(){ - Model model = ModelFactory.createDefaultModel(); + workingModel = ModelFactory.createDefaultModel(); int limit = 1000; int offset = 0; String baseQuery = "CONSTRUCT {?s <%s> ?o.} WHERE {?s <%s> ?o} LIMIT %d OFFSET %d"; String query = String.format(baseQuery, propertyToDescribe.getName(), propertyToDescribe.getName(), limit, offset); Model newModel = executeConstructQuery(query); while(!terminationCriteriaSatisfied() && newModel.size() != 0){ - model.add(newModel); + workingModel.add(newModel); // get number of instances of s with <s p o> query = "SELECT (COUNT(*) AS ?total) WHERE {?s <%s> ?o.}"; query = query.replace("%s", propertyToDescribe.getURI().toString()); - ResultSet rs = executeSelectQuery(query, model); + ResultSet rs = executeSelectQuery(query, workingModel); QuerySolution qs; int total = 0; while(rs.hasNext()){ @@ -108,7 +116,7 @@ } query = "SELECT (COUNT(*) AS ?symmetric) WHERE {?s <%s> ?o. ?o <%s> ?s.}"; query = query.replace("%s", propertyToDescribe.getURI().toString()); - rs = executeSelectQuery(query, model); + rs = executeSelectQuery(query, workingModel); int symmetric = 0; while(rs.hasNext()){ qs = rs.next(); @@ -127,26 +135,57 @@ } } - private void runSPARQL1_1_Mode(){ - String query = "SELECT (COUNT(*) AS ?total) WHERE {?s <%s> ?o.}"; - query = query.replace("%s", propertyToDescribe.getURI().toString()); - ResultSet rs = executeSelectQuery(query); - QuerySolution qs; - int total = 0; - while(rs.hasNext()){ - qs = rs.next(); - total = qs.getLiteral("total").getInt(); + @Override + public SortedSet<KBElement> getPositiveExamples(EvaluatedAxiom axiom) { + if(workingModel != null){ + SortedSet<KBElement> allExamples = new TreeSet<KBElement>(); + ParameterizedSparqlString query = new ParameterizedSparqlString("SELECT DISTINCT ?s WHERE {?s ?p ?o.}"); + query.setIri("p", propertyToDescribe.getName()); + ResultSet rs = executeSelectQuery(query.toString(), workingModel); + while(rs.hasNext()){ + allExamples.add(new Individual(rs.next().get("s").asResource().getURI())); + } + SortedSet<KBElement> negExamples = getNegativeExamples(axiom); + + SortedSet<KBElement> posExamples = new TreeSet<KBElement>(allExamples); + posExamples.removeAll(negExamples); + + + return posExamples; + } else { + throw new UnsupportedOperationException("Getting positive examples is not possible."); } - query = "SELECT (COUNT(*) AS ?symmetric) WHERE {?s <%s> ?o. ?o <%s> ?s.}"; - query = query.replace("%s", propertyToDescribe.getURI().toString()); - rs = executeSelectQuery(query); - int symmetric = 0; - while(rs.hasNext()){ - qs = rs.next(); - symmetric = qs.getLiteral("symmetric").getInt(); + } + + @Override + public SortedSet<KBElement> getNegativeExamples(EvaluatedAxiom axiom) { + if(workingModel != null){ + SortedSet<KBElement> negExamples = new TreeSet<KBElement>(); + ParameterizedSparqlString query = new ParameterizedSparqlString("SELECT DISTINCT ?s WHERE {?s ?p ?o.?o ?p ?s}"); + query.setIri("p", propertyToDescribe.getName()); + ResultSet rs = executeSelectQuery(query.toString(), workingModel); + while(rs.hasNext()){ + negExamples.add(new Individual(rs.next().get("s").asResource().getURI())); + } + + return negExamples; + } else { + throw new UnsupportedOperationException("Getting positive examples is not possible."); } - int asymmetric = total - symmetric; + } + + private void runSPARQL1_1_Mode(){ + int total = reasoner.getPopularity(propertyToDescribe); + if(total > 0){ + int asymmetric = 0; + String query = "SELECT (COUNT(*) AS ?asymmetric) WHERE {?s <%s> ?o. FILTER NOT EXISTS{?o <%s> ?s.}}"; + query = query.replace("%s", propertyToDescribe.getURI().toString()); + ResultSet rs = executeSelectQuery(query); + if(rs.hasNext()){ + asymmetric = rs.next().getLiteral("asymmetric").getInt(); + } + currentlyBestAxioms.add(new EvaluatedAxiom(new AsymmetricObjectPropertyAxiom(propertyToDescribe), computeScore(total, asymmetric), declaredAsymmetric)); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyDomainAxiomLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyDomainAxiomLearner.java 2012-09-27 16:33:12 UTC (rev 3853) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyDomainAxiomLearner.java 2012-10-05 13:27:40 UTC (rev 3854) @@ -20,14 +20,7 @@ package org.dllearner.algorithms.properties; import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; import org.apache.log4j.ConsoleAppender; import org.apache.log4j.Level; @@ -40,19 +33,26 @@ import org.dllearner.core.owl.DatatypeProperty; import org.dllearner.core.owl.DatatypePropertyDomainAxiom; import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.KBElement; import org.dllearner.core.owl.NamedClass; import org.dllearner.core.owl.Thing; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.reasoning.SPARQLReasoner; -import org.semanticweb.owlapi.model.IRI; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.hp.hpl.jena.query.ParameterizedSparqlString; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.rdf.model.StmtIterator; +import com.hp.hpl.jena.vocabulary.OWL; +import com.hp.hpl.jena.vocabulary.RDF; @ComponentAnn(name="dataproperty domain axiom learner", shortName="dpldomain", version=0.1) public class DataPropertyDomainAxiomLearner extends AbstractAxiomLearningAlgorithm { @@ -62,14 +62,10 @@ @ConfigOption(name="propertyToDescribe", description="", propertyEditorClass=DataPropertyEditor.class) private DatatypeProperty propertyToDescribe; - private static final ParameterizedSparqlString singleQueryTemplate = new ParameterizedSparqlString("SELECT ?type (COUNT(DISTINCT ?ind) AS ?cnt) WHERE {?ind <%s> ?o. ?ind a ?type.}"); - - private Map<Individual, SortedSet<Description>> individual2Types; - public DataPropertyDomainAxiomLearner(SparqlEndpointKS ks){ this.ks = ks; - super.iterativeQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?ind ?type WHERE {?ind ?p ?o. ?ind a ?type.}"); - + super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s WHERE {?s a ?type}"); + super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s WHERE {?s ?p ?o. FILTER NOT EXISTS{?s a ?type}}"); } public DatatypeProperty getPropertyToDescribe() { @@ -82,7 +78,6 @@ @Override public void start() { - iterativeQueryTemplate.setIri("p", propertyToDescribe.getName()); logger.info("Start learning..."); startTime = System.currentTimeMillis(); fetchedRows = 0; @@ -104,81 +99,108 @@ } } } - - runIterativeQueryMode(); + if(!forceSPARQL_1_0_Mode && ks.supportsSPARQL_1_1()){ + runSingleQueryMode(); + } else { + runSPARQL1_0_Mode(); + } logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); } private void runSingleQueryMode(){ - } - - private void runIterativeQueryMode(){ - individual2Types = new HashMap<Individual, SortedSet<Description>>(); - while(!terminationCriteriaSatisfied() && !fullDataLoaded){ - ResultSet rs = fetchData(); - processData(rs); - buildEvaluatedAxioms(); + String query = String.format("SELECT (COUNT(DISTINCT ?s) AS ?cnt) WHERE {?s <%s> ?o.}", propertyToDescribe.getName()); + ResultSet rs = executeSelectQuery(query); + int nrOfSubjects = rs.next().getLiteral("cnt").getInt(); + + query = String.format("SELECT ?type (COUNT(DISTINCT ?s) AS ?cnt) WHERE {?s <%s> ?o. ?s a ?type.} GROUP BY ?type", propertyToDescribe.getName()); + rs = executeSelectQuery(query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + NamedClass domain = new NamedClass(qs.getResource("type").getURI()); + int cnt = qs.getLiteral("cnt").getInt(); + if(!domain.getURI().equals(Thing.uri)){ + currentlyBestAxioms.add(new EvaluatedAxiom(new DatatypePropertyDomainAxiom(propertyToDescribe, domain), computeScore(nrOfSubjects, cnt))); + } } } - private void processData(ResultSet rs){ - QuerySolution qs; - Individual ind; - Description type; - SortedSet<Description> types; - int cnt = 0; - while(rs.hasNext()){ - cnt++; - qs = rs.next(); - if(qs.get("type").isURIResource()){ - types = new TreeSet<Description>(); - ind = new Individual(qs.getResource("ind").getURI()); - type = new NamedClass(qs.getResource("type").getURI()); - types.add(type); - if(reasoner.isPrepared()){ - if(reasoner.getClassHierarchy().contains(type)){ - types.addAll(reasoner.getClassHierarchy().getSuperClasses(type)); + private void runSPARQL1_0_Mode() { + workingModel = ModelFactory.createDefaultModel(); + int limit = 1000; + int offset = 0; + String baseQuery = "CONSTRUCT {?s a ?type.} WHERE {?s <%s> ?o. ?s a ?type.} LIMIT %d OFFSET %d"; + String query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); + Model newModel = executeConstructQuery(query); + while(!terminationCriteriaSatisfied() && newModel.size() != 0){ + workingModel.add(newModel); + // get number of distinct subjects + query = "SELECT (COUNT(DISTINCT ?s) AS ?all) WHERE {?s a ?type.}"; + ResultSet rs = executeSelectQuery(query, workingModel); + QuerySolution qs; + int all = 1; + while (rs.hasNext()) { + qs = rs.next(); + all = qs.getLiteral("all").getInt(); + } + + // get class and number of instances + query = "SELECT ?type (COUNT(DISTINCT ?s) AS ?cnt) WHERE {?s a ?type.} GROUP BY ?type ORDER BY DESC(?cnt)"; + rs = executeSelectQuery(query, workingModel); + + if (all > 0) { + currentlyBestAxioms.clear(); + while(rs.hasNext()){ + qs = rs.next(); + Resource type = qs.get("type").asResource(); + //omit owl:Thing as trivial domain + if(type.equals(OWL.Thing)){ + continue; } + currentlyBestAxioms.add(new EvaluatedAxiom( + new DatatypePropertyDomainAxiom(propertyToDescribe, new NamedClass(type.getURI())), + computeScore(all, qs.get("cnt").asLiteral().getInt()))); } - addToMap(individual2Types, ind, types); + } + offset += limit; + query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); + newModel = executeConstructQuery(query); + fillWithInference(newModel); } - lastRowCount = cnt; } - - private void buildEvaluatedAxioms(){ - List<EvaluatedAxiom> axioms = new ArrayList<EvaluatedAxiom>(); - Map<Description, Integer> result = new HashMap<Description, Integer>(); - for(Entry<Individual, SortedSet<Description>> entry : individual2Types.entrySet()){ - for(Description nc : entry.getValue()){ - Integer cnt = result.get(nc); - if(cnt == null){ - cnt = Integer.valueOf(1); - } else { - cnt = Integer.valueOf(cnt + 1); + + private void fillWithInference(Model model){ + Model additionalModel = ModelFactory.createDefaultModel(); + if(reasoner.isPrepared()){ + for(StmtIterator iter = model.listStatements(null, RDF.type, (RDFNode)null); iter.hasNext();){ + Statement st = iter.next(); + Description cls = new NamedClass(st.getObject().asResource().getURI()); + if(reasoner.getClassHierarchy().contains(cls)){ + for(Description sup : reasoner.getClassHierarchy().getSuperClasses(cls)){ + additionalModel.add(st.getSubject(), st.getPredicate(), model.createResource(sup.toString())); + } } - result.put(nc, cnt); } } - - //omit owl:Thing - result.remove(new NamedClass(Thing.instance.getURI())); - - EvaluatedAxiom evalAxiom; - int total = individual2Types.keySet().size(); - for(Entry<Description, Integer> entry : sortByValues(result)){ - evalAxiom = new EvaluatedAxiom(new DatatypePropertyDomainAxiom(propertyToDescribe, entry.getKey()), - computeScore(total, entry.getValue())); - if(existingAxioms.contains(evalAxiom.getAxiom())){ - evalAxiom.setAsserted(true); - } - axioms.add(evalAxiom); - } - - currentlyBestAxioms = axioms; + model.add(additionalModel); } + @Override + public Set<KBElement> getPositiveExamples(EvaluatedAxiom evAxiom) { + DatatypePropertyDomainAxiom axiom = (DatatypePropertyDomainAxiom) evAxiom.getAxiom(); + posExamplesQueryTemplate.setIri("type", axiom.getDomain().toString()); + return super.getPositiveExamples(evAxiom); + } + + @Override + public Set<KBElement> getNegativeExamples(EvaluatedAxiom evAxiom) { + DatatypePropertyDomainAxiom axiom = (DatatypePropertyDomainAxiom) evAxiom.getAxiom(); + negExamplesQueryTemplate.setIri("type", axiom.getDomain().toString()); + return super.getNegativeExamples(evAxiom); + } + public static void main(String[] args) throws Exception{ org.apache.log4j.Logger.getRootLogger().addAppender(new ConsoleAppender(new SimpleLayout())); org.apache.log4j.Logger.getRootLogger().setLevel(Level.INFO); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyRangeAxiomLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyRangeAxiomLearner.java 2012-09-27 16:33:12 UTC (rev 3853) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DataPropertyRangeAxiomLearner.java 2012-10-05 13:27:40 UTC (rev 3854) @@ -20,12 +20,7 @@ package org.dllearner.algorithms.properties; import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.SortedSet; -import java.util.TreeSet; +import java.util.Set; import org.dllearner.core.AbstractAxiomLearningAlgorithm; import org.dllearner.core.ComponentAnn; @@ -36,15 +31,19 @@ import org.dllearner.core.owl.Datatype; import org.dllearner.core.owl.DatatypeProperty; import org.dllearner.core.owl.DatatypePropertyRangeAxiom; -import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.KBElement; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.reasoning.SPARQLReasoner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.hp.hpl.jena.query.ParameterizedSparqlString; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.Resource; @ComponentAnn(name="dataproperty range learner", shortName="dblrange", version=0.1) public class DataPropertyRangeAxiomLearner extends AbstractAxiomLearningAlgorithm { @@ -56,6 +55,9 @@ public DataPropertyRangeAxiomLearner(SparqlEndpointKS ks){ this.ks = ks; + super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT ?s WHERE {?o ?p ?s. FILTER (DATATYPE(?s) = ?dt)}"); + super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT ?s WHERE {?o ?p ?s. FILTER (DATATYPE(?s) != ?dt)}"); + } public DatatypeProperty getPropertyToDescribe() { @@ -72,84 +74,103 @@ startTime = System.currentTimeMillis(); fetchedRows = 0; currentlyBestAxioms = new ArrayList<EvaluatedAxiom>(); - //get existing range - DataRange existingRange = reasoner.getRange(propertyToDescribe); - if(existingRange != null){ - existingAxioms.add(new DatatypePropertyRangeAxiom(propertyToDescribe, existingRange)); - logger.debug("Existing range: " + existingRange); - } - //get objects with datatypes - Map<Individual, SortedSet<Datatype>> individual2Datatypes = new HashMap<Individual, SortedSet<Datatype>>(); - boolean repeat = true; - int limit = 1000; - while(!terminationCriteriaSatisfied() && repeat){ - int ret = addIndividualsWithTypes(individual2Datatypes, limit, fetchedRows); - currentlyBestAxioms = buildEvaluatedAxioms(individual2Datatypes); - fetchedRows += 1000; - repeat = (ret == limit); - } - logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); - } - - private List<EvaluatedAxiom> buildEvaluatedAxioms(Map<Individual, SortedSet<Datatype>> individual2Types){ - List<EvaluatedAxiom> axioms = new ArrayList<EvaluatedAxiom>(); - Map<Datatype, Integer> result = new HashMap<Datatype, Integer>(); - for(Entry<Individual, SortedSet<Datatype>> entry : individual2Types.entrySet()){ - for(Datatype nc : entry.getValue()){ - Integer cnt = result.get(nc); - if(cnt == null){ - cnt = Integer.valueOf(1); - } else { - cnt = Integer.valueOf(cnt + 1); - } - result.put(nc, cnt); + if(returnOnlyNewAxioms){ + //get existing ranges + DataRange existingRange = reasoner.getRange(propertyToDescribe); + if(existingRange != null){ + existingAxioms.add(new DatatypePropertyRangeAxiom(propertyToDescribe, existingRange)); } } - EvaluatedAxiom evalAxiom; - int total = individual2Types.keySet().size(); - for(Entry<Datatype, Integer> entry : sortByValues(result)){ - evalAxiom = new EvaluatedAxiom(new DatatypePropertyRangeAxiom(propertyToDescribe, entry.getKey()), - computeScore(total, entry.getValue())); - if(existingAxioms.contains(evalAxiom.getAxiom())){ - evalAxiom.setAsserted(true); - } - axioms.add(evalAxiom); + if(!forceSPARQL_1_0_Mode && ks.supportsSPARQL_1_1()){ + runSingleQueryMode(); + } else { + runSPARQL1_0_Mode(); } - - return axioms; + logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); } - - private int addIndividualsWithTypes(Map<Individual, SortedSet<Datatype>> ind2Datatypes, int limit, int offset){ - String query = String.format("SELECT ?ind (DATATYPE(?val) AS ?datatype) WHERE {?ind <%s> ?val.} LIMIT %d OFFSET %d", propertyToDescribe.getName(), limit, offset); + private void runSingleQueryMode(){ + String query = String.format("SELECT (COUNT(DISTINCT ?o) AS ?cnt) WHERE {?s <%s> ?o.}", propertyToDescribe.getName()); ResultSet rs = executeSelectQuery(query); - Individual ind; - Datatype newType; + int nrOfSubjects = rs.next().getLiteral("cnt").getInt(); + + query = String.format("SELECT (DATATYPE(?o) AS ?type) (COUNT(DISTINCT ?o) AS ?cnt) WHERE {?s <%s> ?o.} GROUP BY DATATYPE(?o)", propertyToDescribe.getName()); + rs = executeSelectQuery(query); QuerySolution qs; - SortedSet<Datatype> types; - int cnt = 0; while(rs.hasNext()){ - cnt++; - newType = null; qs = rs.next(); - ind = new Individual(qs.getResource("ind").getURI()); - if(qs.getResource("datatype") != null){ - newType = new Datatype(qs.getResource("datatype").getURI()); - types = ind2Datatypes.get(ind); - if(types == null){ - types = new TreeSet<Datatype>(); - ind2Datatypes.put(ind, types); + if(qs.get("type") != null){ + DataRange range = new Datatype(qs.get("type").asLiteral().getLexicalForm()); + int cnt = qs.getLiteral("cnt").getInt(); + currentlyBestAxioms.add(new EvaluatedAxiom(new DatatypePropertyRangeAxiom(propertyToDescribe, range), computeScore(nrOfSubjects, cnt))); + } + } + } + + private void runSPARQL1_0_Mode() { + workingModel = ModelFactory.createDefaultModel(); + int limit = 1000; + int offset = 0; + String baseQuery = "CONSTRUCT {?s <%s> ?o} WHERE {?s <%s> ?o.} LIMIT %d OFFSET %d"; + String query = String.format(baseQuery, propertyToDescribe.getName(), propertyToDescribe.getName(), limit, offset); + Model newModel = executeConstructQuery(query); + while(!terminationCriteriaSatisfied() && newModel.size() != 0){ + workingModel.add(newModel); + // get number of distinct subjects + query = "SELECT (COUNT(?o) AS ?all) WHERE {?s ?p ?o.}"; + ResultSet rs = executeSelectQuery(query, workingModel); + QuerySolution qs; + int all = 1; + while (rs.hasNext()) { + qs = rs.next(); + all = qs.getLiteral("all").getInt(); + } + + // get class and number of instances +// query = "SELECT (DATATYPE(?o) AS ?dt) (COUNT(?o) AS ?cnt) WHERE{?s ?p ?o} GROUP BY DATATYPE(?o) ORDER BY DESC(?cnt)"; + query = "SELECT ?dt (COUNT(?o) AS ?cnt) " + + "WHERE {" + + "{" + + "SELECT (DATATYPE(?o) AS ?dt) ?o WHERE{?s ?p ?o}" + + "}" + + "}" + + "GROUP BY ?dt"; + rs = executeSelectQuery(query, workingModel); + + if (all > 0) { + currentlyBestAxioms.clear(); + while(rs.hasNext()){ + qs = rs.next(); + Resource type = qs.get("dt").asResource(); + currentlyBestAxioms.add(new EvaluatedAxiom( + new DatatypePropertyRangeAxiom(propertyToDescribe, new Datatype(type.getURI())), + computeScore(all, qs.get("cnt").asLiteral().getInt()))); } - types.add(newType); + } - + offset += limit; + query = String.format(baseQuery, propertyToDescribe.getName(), propertyToDescribe.getName(), limit, offset); + newModel = executeConstructQuery(query); } - return cnt; } + @Override + public Set<KBElement> getPositiveExamples(EvaluatedAxiom evAxiom) { + DatatypePropertyRangeAxiom axiom = (DatatypePropertyRangeAxiom) evAxiom.getAxiom(); + posExamplesQueryTemplate.setIri("dt", axiom.getRange().toString()); + return super.getPositiveExamples(evAxiom); + } + + @Override + public Set<KBElement> getNegativeExamples(EvaluatedAxiom evAxiom) { + DatatypePropertyRangeAxiom axiom = (DatatypePropertyRangeAxiom) evAxiom.getAxiom(); + negExamplesQueryTemplate.setIri("dt", axiom.getRange().toString()); + return super.getNegativeExamples(evAxiom); + } + public static void main(String[] args) throws Exception{ SparqlEndpointKS ks = new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpediaLiveAKSW()); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointDataPropertyAxiomLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointDataPropertyAxiomLearner.java 2012-09-27 16:33:12 UTC (rev 3853) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointDataPropertyAxiomLearner.java 2012-10-05 13:27:40 UTC (rev 3854) @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -32,8 +33,13 @@ import org.dllearner.core.EvaluatedAxiom; import org.dllearner.core.config.ConfigOption; import org.dllearner.core.config.DataPropertyEditor; +import org.dllearner.core.owl.Datatype; import org.dllearner.core.owl.DatatypeProperty; import org.dllearner.core.owl.DisjointDatatypePropertyAxiom; +import org.dllearner.core.owl.GenericDatatypePropertyAssertion; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.KBElement; +import org.dllearner.core.owl.ObjectProperty; import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SPARQLTasks; @@ -42,8 +48,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.hp.hpl.jena.query.ParameterizedSparqlString; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.RDFNode; @@ -64,6 +72,9 @@ public DisjointDataPropertyAxiomLearner(SparqlEndpointKS ks){ this.ks = ks; + + super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s ?o WHERE {?s ?p1 ?o. FILTER NOT EXISTS{?s ?p ?o}}"); + super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s ?o WHERE {?s ?p ?o. }"); } public DatatypeProperty getPropertyToDescribe() { @@ -94,7 +105,8 @@ allDataProperties.remove(propertyToDescribe); if(!forceSPARQL_1_0_Mode && ks.supportsSPARQL_1_1()){ - runSPARQL1_1_Mode(); +// runSPARQL1_1_Mode(); + runSingleQueryMode(); } else { runSPARQL1_0_Mode(); } @@ -102,21 +114,52 @@ logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); } + private void runSingleQueryMode(){ + //compute the overlap if exist + Map<ObjectProperty, Integer> property2Overlap = new HashMap<ObjectProperty, Integer>(); + String query = String.format("SELECT ?p (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o. ?s ?p ?o.} GROUP BY ?p", propertyToDescribe.getName()); + ResultSet rs = executeSelectQuery(query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + ObjectProperty prop = new ObjectProperty(qs.getResource("p").getURI()); + int cnt = qs.getLiteral("cnt").getInt(); + property2Overlap.put(prop, cnt); + } + //for each property in knowledge base + for(DatatypeProperty p : allDataProperties){ + //get the popularity + int otherPopularity = reasoner.getPopularity(p); + if(otherPopularity == 0){//skip empty properties + continue; + } + //get the overlap + int overlap = property2Overlap.containsKey(p) ? property2Overlap.get(p) : 0; + //compute the estimated precision + double precision = accuracy(otherPopularity, overlap); + //compute the estimated recall + double recall = accuracy(popularity, overlap); + //compute the final score + double score = 1 - fMEasure(precision, recall); + + currentlyBestAxioms.add(new EvaluatedAxiom(new DisjointDatatypePropertyAxiom(propertyToDescribe, p), new AxiomScore(score))); + } + } + private void runSPARQL1_0_Mode() { - Model model = ModelFactory.createDefaultModel(); + workingModel = ModelFactory.createDefaultModel(); int limit = 1000; int offset = 0; String baseQuery = "CONSTRUCT {?s ?p ?o.} WHERE {?s <%s> ?o. ?s ?p ?o.} LIMIT %d OFFSET %d"; + String countQuery = "SELECT ?p (COUNT(?s) AS ?count) WHERE {?s ?p ?o.} GROUP BY ?p"; String query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); Model newModel = executeConstructQuery(query); Map<DatatypeProperty, Integer> result = new HashMap<DatatypeProperty, Integer>(); while(!terminationCriteriaSatisfied() && newModel.size() != 0){ - model.add(newModel); - query = "SELECT ?p (COUNT(?s) AS ?count) WHERE {?s ?p ?o.} GROUP BY ?p"; - + workingModel.add(newModel); DatatypeProperty prop; Integer oldCnt; - ResultSet rs = executeSelectQuery(query, model); + ResultSet rs = executeSelectQuery(countQuery, workingModel); QuerySolution qs; while(rs.hasNext()){ qs = rs.next(); @@ -135,7 +178,7 @@ offset += limit; - query = String.format(baseQuery, propertyToDescribe.getName(), propertyToDescribe.getName(), limit, offset); + query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); newModel = executeConstructQuery(query); } @@ -253,6 +296,56 @@ return axioms; } + @Override + public Set<KBElement> getPositiveExamples(EvaluatedAxiom evAxiom) { + DisjointDatatypePropertyAxiom axiom = (DisjointDatatypePropertyAxiom) evAxiom.getAxiom(); + posExamplesQueryTemplate.setIri("p", axiom.getDisjointRole().getName()); + if(workingModel != null){ + Set<KBElement> posExamples = new HashSet<KBElement>(); + + ResultSet rs = executeSelectQuery(posExamplesQueryTemplate.toString(), workingModel); + Individual subject; + Literal object; + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + subject = new Individual(qs.getResource("s").getURI()); + object = qs.getLiteral("o"); + posExamples.add(new GenericDatatypePropertyAssertion( + propertyToDescribe, subject, object.getLexicalForm(), new Datatype(object.getDatatypeURI()))); + } + + return posExamples; + } else { + throw new UnsupportedOperationException("Getting positive examples is not possible."); + } + } + + @Override + public Set<KBElement> getNegativeExamples(EvaluatedAxiom evAxiom) { + DisjointDatatypePropertyAxiom axiom = (DisjointDatatypePropertyAxiom) evAxiom.getAxiom(); + negExamplesQueryTemplate.setIri("p", axiom.getDisjointRole().getName()); + if(workingModel != null){ + Set<KBElement> negExamples = new TreeSet<KBElement>(); + + ResultSet rs = executeSelectQuery(negExamplesQueryTemplate.toString(), workingModel); + Individual subject; + Literal object; + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + subject = new Individual(qs.getResource("s").getURI()); + object = qs.getLiteral("o"); + negExamples.add(new GenericDatatypePropertyAssertion( + propertyToDescribe, subject, object.getLexicalForm(), new Datatype(object.getDatatypeURI()))); + } + + return negExamples; + } else { + throw new UnsupportedOperationException("Getting positive examples is not possible."); + } + } + public static void main(String[] args) throws Exception{ DisjointDataPropertyAxiomLearner l = new DisjointDataPropertyAxiomLearner(new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpediaLiveAKSW())); l.setPropertyToDescribe(new DatatypeProperty("http://dbpedia.org/ontology/accessDate")); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java 2012-09-27 16:33:12 UTC (rev 3853) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java 2012-10-05 13:27:40 UTC (rev 3854) @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -33,7 +34,10 @@ import org.dllearner.core.config.ConfigOption; import org.dllearner.core.config.ObjectPropertyEditor; import org.dllearner.core.owl.DisjointObjectPropertyAxiom; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.KBElement; import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.core.owl.ObjectPropertyAssertion; import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SPARQLTasks; @@ -42,6 +46,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.hp.hpl.jena.query.ParameterizedSparqlString; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; @@ -64,6 +69,9 @@ public DisjointObjectPropertyAxiomLearner(SparqlEndpointKS ks){ this.ks = ks; + + super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s ?o WHERE {?s ?p1 ?o. FILTER NOT EXISTS{?s ?p ?o}}"); + super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s ?o WHERE {?s ?p ?o. }"); } public ObjectProperty getPropertyToDescribe() { @@ -95,7 +103,8 @@ allObjectProperties.remove(propertyToDescribe); if(!forceSPARQL_1_0_Mode && ks.supportsSPARQL_1_1()){ - runSPARQL1_1_Mode(); +// runSPARQL1_1_Mode(); + runSingleQueryMode(); } else { runSPARQL1_0_Mode(); } @@ -103,21 +112,53 @@ logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); } + private void runSingleQueryMode(){ + //compute the overlap if exist + Map<ObjectProperty, Integer> property2Overlap = new HashMap<ObjectProperty, Integer>(); + String query = String.format("SELECT ?p (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o. ?s ?p ?o.} GROUP BY ?p", propertyToDescribe.getName()); + ResultSet rs = executeSelectQuery(query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + ObjectProperty prop = new ObjectProperty(qs.getResource("p").getURI()); + int cnt = qs.getLiteral("cnt").getInt(); + property2Overlap.put(prop, cnt); + } + //for each property in knowledge base + for(ObjectProperty p : allObjectProperties){ + //get the popularity + int otherPopularity = reasoner.getPopularity(p); + if(otherPopularity == 0){//skip empty properties + continue; + } + //get the overlap + int overlap = property2Overlap.containsKey(p) ? property2Overlap.get(p) : 0; + //compute the estimated precision + double precision = accuracy(otherPopularity, overlap); + //compute the estimated recall + double recall = accuracy(popularity, overlap); + //compute the final score + double score = 1 - fMEasure(precision, recall); + + currentlyBestAxioms.add(new EvaluatedAxiom(new DisjointObjectPropertyAxiom(propertyToDescribe, p), new AxiomScore(score))); + } + } + private void runSPARQL1_0_Mode() { - Model model = ModelFactory.createDefaultModel(); + workingModel = ModelFactory.createDefaultModel(); int limit = 1000; int offset = 0; String baseQuery = "CONSTRUCT {?s ?p ?o.} WHERE {?s <%s> ?o. ?s ?p ?o.} LIMIT %d OFFSET %d"; + String countQuery = "SELECT ?p (COUNT(?s) AS ?count) WHERE {?s ?p ?o.} GROUP BY ?p"; String query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); Model newModel = executeConstructQuery(query); Map<ObjectProperty, Integer> result = new HashMap<ObjectProperty, Integer>(); while(!terminationCriteriaSatisfied() && newModel.size() != 0){ - model.add(newModel); - query = "SELECT ?p (COUNT(?s) AS ?count) WHERE {?s ?p ?o.} GROUP BY ?p"; + workingModel.add(newModel); ObjectProperty prop; Integer oldCnt; - ResultSet rs = executeSelectQuery(query, model); + ResultSet rs = executeSelectQuery(countQuery, workingModel); QuerySolution qs; while(rs.hasNext()){ qs = rs.next(); @@ -134,7 +175,6 @@ currentlyBestAxioms = buildAxioms(result, allObjectProperties); } - offset += limit; query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); newModel = executeConstructQuery(query); @@ -247,6 +287,54 @@ return axioms; } + @Override + public Set<KBElement> getPositiveExamples(EvaluatedAxiom evAxiom) { + DisjointObjectPropertyAxiom axiom = (DisjointObjectPropertyAxiom) evAxiom.getAxiom(); + posExamplesQueryTemplate.setIri("p", axiom.getDisjointRole().getName()); + if(workingModel != null){ + Set<KBElement> posExamples = new HashSet<KBElement>(); + + ResultSet rs = executeSelectQuery(posExamplesQueryTemplate.toString(), workingModel); + Individual subject; + Individual object; + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + subject = new Individual(qs.getResource("s").getURI()); + object = new Individual(qs.getResource("o").getURI()); + posExamples.add(new ObjectPropertyAssertion(propertyToDescribe, subject, object)); + } + + return posExamples; + } else { + throw new UnsupportedOperationException("Getting positive examples is not possible."); + } + } + + @Override + public Set<KBElement> getNegativeExamples(EvaluatedAxiom evAxiom) { + DisjointObjectPropertyAxiom axiom = (DisjointObjectPropertyAxiom) evAxiom.getAxiom(); + negExamplesQueryTemplate.setIri("p", axiom.getDisjointRole().getName()); + if(workingModel != null){ + Set<KBElement> negExamples = new TreeSet<KBElement>(); + + ResultSet rs = executeSelectQuery(negExamplesQueryTemplate.toString(), workingModel); + Individual subject; + Individual object; + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + subject = new Individual(qs.getResource("s").getURI()); + object = new Individual(qs.getResource("o").getURI()); + negExamples.add(new ObjectPropertyAssertion(propertyToDescribe, subject, object)); + } + + return negExamples; + } else { + throw new UnsupportedOperationException("Getting positive examples is not possible."); + } + } + public static void main(String[] args) throws Exception{ SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); // endpoint = new SparqlEndpoint(new URL("http://dbpedia.aksw.org:8902/sparql"), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList())); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentDataPropertyAxiomLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentDataPropertyAxiomLearner.java 2012-09-27 16:33:12 UTC (rev 3853) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentDataPropertyAxiomLearner.java 2012-10-05 13:27:40 UTC (rev 3854) @@ -20,10 +20,8 @@ package org.dllearner.algorithms.properties; import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; +import java.util.HashSet; +import java.util.Set; import java.util.SortedSet; import org.dllearner.core.AbstractAxiomLearningAlgorithm; @@ -31,16 +29,21 @@ import org.dllearner.core.EvaluatedAxiom; import org.dllearner.core.config.ConfigOption; import org.dllearner.core.config.DataPropertyEditor; +import org.dllearner.core.owl.Datatype; import org.dllearner.core.owl.DatatypeProperty; import org.dllearner.core.owl.EquivalentDatatypePropertiesAxiom; +import org.dllearner.core.owl.GenericDatatypePropertyAssertion; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.KBElement; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SparqlEndpoint; -import org.dllearner.learningproblems.AxiomScore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.hp.hpl.jena.query.ParameterizedSparqlString; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; @@ -54,6 +57,9 @@ public EquivalentDataPropertyAxiomLearner(SparqlEndpointKS ks){ this.ks = ks; + super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s ?o WHERE {?s ?p ?o}"); + super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s ?o WHERE {?s ?p1 ?o. FILTER NOT EXISTS{?s ?p ?o}}"); + } public DatatypeProperty getPropertyToDescribe() { @@ -70,115 +76,131 @@ startTime = System.currentTimeMillis(); fetchedRows = 0; currentlyBestAxioms = new ArrayList<EvaluatedAxiom>(); - //get existing super properties - SortedSet<DatatypeProperty> existingSuperProperties = reasoner.getSuperProperties(propertyToDescribe); - logger.debug("Existing super properties: " + existingSuperProperties); + if(returnOnlyNewAxioms){ + //get existing domains + SortedSet<DatatypeProperty> existingSuperProperties = reasoner.getEquivalentProperties(propertyToDescribe); + if(existingSuperProperties != null && !existingSuperProperties.isEmpty()){ + for(DatatypeProperty supProp : existingSuperProperties){ + existingAxioms.add(new EquivalentDatatypePropertiesAxiom(propertyToDescribe, supProp)); + } + } + } + if(!forceSPARQL_1_0_Mode && ks.supportsSPARQL_1_1()){ - runSPARQL1_1_Mode(); + runSingleQueryMode(); } else { runSPARQL1_0_Mode(); } - logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); } + private void runSingleQueryMode(){ + int total = reasoner.getPopularity(propertyToDescribe); + + if(total > 0){ + String query = String.format("SELECT ?p (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o. ?s ?p ?o.} GROUP BY ?p", propertyToDescribe.getName()); + ResultSet rs = executeSelectQuery(query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + DatatypeProperty prop = new DatatypeProperty(qs.getResource("p").getURI()); + int cnt = qs.getLiteral("cnt").getInt(); + if(!prop.equals(propertyToDescribe)){ + currentlyBestAxioms.add(new EvaluatedAxiom(new EquivalentDatatypePropertiesAxiom(propertyToDescribe, prop), computeScore(total, cnt))); + + } + } + } + } + private void runSPARQL1_0_Mode() { - Model model = ModelFactory.createDefaultModel(); + workingModel = ModelFactory.createDefaultModel(); int limit = 1000; int offset = 0; String baseQuery = "CONSTRUCT {?s ?p ?o.} WHERE {?s <%s> ?o. ?s ?p ?o.} LIMIT %d OFFSET %d"; String query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); Model newModel = executeConstructQuery(query); - Map<DatatypeProperty, Integer> result = new HashMap<DatatypeProperty, Integer>(); while(!terminationCriteriaSatisfied() && newModel.size() != 0){ - model.add(newModel); - query = "SELECT ?p (COUNT(?s) AS ?count) WHERE {?s ?p ?o.} GROUP BY ?p"; + workingModel.add(newModel); + // get number of triples + int all = (int)workingModel.size(); - DatatypeProperty prop; - Integer oldCnt; - ResultSet rs = executeSelectQuery(query, model); - QuerySolution qs; - while(rs.hasNext()){ - qs = rs.next(); - prop = new DatatypeProperty(qs.getResource("p").getURI()); - int newCnt = qs.getLiteral("count").getInt(); - oldCnt = result.get(prop); - if(oldCnt == null){ - oldCnt = Integer.valueOf(newCnt); + if (all > 0) { + // get class and number of instances + query = "SELECT ?p (COUNT(*) AS ?cnt) WHERE {?s ?p ?o.} GROUP BY ?p ORDER BY DESC(?cnt)"; + ResultSet rs = executeSelectQuery(query, workingModel); + + currentlyBestAxioms.clear(); + QuerySolution qs; + DatatypeProperty prop; + while(rs.hasNext()){ + qs = rs.next(); + prop = new DatatypeProperty(qs.get("p").asResource().getURI()); + //omit property to describe as it is trivial + if(prop.equals(propertyToDescribe)){ + continue; + } + currentlyBestAxioms.add(new EvaluatedAxiom( + new EquivalentDatatypePropertiesAxiom(propertyToDescribe, prop), + computeScore(all, qs.get("cnt").asLiteral().getInt()))); } - result.put(prop, oldCnt); - qs.getLiteral("count").getInt(); + } - if(!result.isEmpty()){ - currentlyBestAxioms = buildAxioms(result); - } - - offset += limit; query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); newModel = executeConstructQuery(query); } - } - private void runSPARQL1_1_Mode() { - // get subjects with types - int limit = 1000; - int offset = 0; - String queryTemplate = "PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT ?p COUNT(?s) AS ?count WHERE {?s ?p ?o.?p a owl:DatatypeProperty." - + "{SELECT ?s ?o WHERE {?s <%s> ?o.} LIMIT %d OFFSET %d}" + "}"; - String query; - Map<DatatypeProperty, Integer> result = new HashMap<DatatypeProperty, Integer>(); - DatatypeProperty prop; - Integer oldCnt; - boolean repeat = true; - - while (!terminationCriteriaSatisfied() && repeat) { - query = String.format(queryTemplate, propertyToDescribe, limit, - offset); - ResultSet rs = executeSelectQuery(query); + @Override + public Set<KBElement> getPositiveExamples(EvaluatedAxiom evAxiom) { + EquivalentDatatypePropertiesAxiom axiom = (EquivalentDatatypePropertiesAxiom) evAxiom.getAxiom(); + posExamplesQueryTemplate.setIri("p", axiom.getRole().toString()); + if(workingModel != null){ + Set<KBElement> posExamples = new HashSet<KBElement>(); + + ResultSet rs = executeSelectQuery(posExamplesQueryTemplate.toString(), workingModel); + Individual subject; + Literal object; QuerySolution qs; - repeat = false; - while (rs.hasNext()) { + while(rs.hasNext()){ qs = rs.next(); - prop = new DatatypeProperty(qs.getResource("p").getURI()); - int newCnt = qs.getLiteral("count").getInt(); - oldCnt = result.get(prop); - if(oldCnt == null){ - oldCnt = Integer.valueOf(newCnt); - } else { - oldCnt += newCnt; - } - result.put(prop, oldCnt); - qs.getLiteral("count").getInt(); - repeat = true; + subject = new Individual(qs.getResource("s").getURI()); + object = qs.getLiteral("o"); + posExamples.add(new GenericDatatypePropertyAssertion( + propertyToDescribe, subject, object.getLexicalForm(), new Datatype(object.getDatatypeURI()))); } - if (!result.isEmpty()) { - currentlyBestAxioms = buildAxioms(result); - offset += 1000; - } - + + return posExamples; + } else { + throw new UnsupportedOperationException("Getting positive examples is not possible."); } - } - - - private List<EvaluatedAxiom> buildAxioms(Map<DatatypeProperty, Integer> property2Count){ - List<EvaluatedAxiom> axioms = new ArrayList<EvaluatedAxiom>(); - Integer all = property2Count.get(propertyToDescribe); - property2Count.remove(propertyToDescribe); - - EvaluatedAxiom evalAxiom; - for(Entry<DatatypeProperty, Integer> entry : sortByValues(property2Count)){ - evalAxiom = new EvaluatedAxiom(new EquivalentDatatypePropertiesAxiom(propertyToDescribe, entry.getKey()), - new AxiomScore(entry.getValue() / (double)all)); - axioms.add(evalAxiom); + @Override + public Set<KBElement> getNegativeExamples(EvaluatedAxiom evAxiom) { + EquivalentDatatypePropertiesAxiom axiom = (EquivalentDatatypePropertiesAxiom) evAxiom.getAxiom(); + negExamplesQueryTemplate.setIri("p", axiom.getRole().toString()); + if(workingModel != null){ + Set<KBElement> negExamples = new HashSet<KBElement>(); + + ResultSet rs = executeSelectQuery(negExamplesQueryTemplate.toString(), workingModel); + Individual subject; + Literal object; + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + subject = new Individual(qs.getResource("s").getURI()); + object = qs.getLiteral("o"); + negExamples.add(new GenericDatatypePropertyAssertion( + propertyToDescribe, subject, object.getLexicalForm(), new Datatype(object.getDatatypeURI()))); + } + + return negExamples; + } else { + throw new UnsupportedOperationException("Getting positive examples is not possible."); } - - property2Count.put(propertyToDescribe, all); - return axioms; } public static void main(String[] args) throws Exception{ Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentObjectPropertyAxiomLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentObjectPropertyAxiomLearner.java 2012-09-27 16:33:12 UTC (rev 3853) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentObjectPropertyAxiomLearner.java 2012-10-05 13:27:40 UTC (rev 3854) @@ -22,27 +22,27 @@ import java.net.URL; import java.util.ArrayList; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; import java.util.Set; import java.util.SortedSet; +import java.util.TreeSet; import org.dllearner.core.AbstractAxiomLearningAlgorithm; import org.dllearner.core.ComponentAnn; import org.dllearner.core.EvaluatedAxiom; -import org.dllearner.core.Score; import org.dllearner.core.config.ConfigOption; import org.dllearner.core.config.ObjectPropertyEditor; import org.dllearner.core.owl.EquivalentObjectPropertiesAxiom; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.KBElement; import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.core.owl.ObjectPropertyAssertion; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SparqlEndpoint; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.hp.hpl.jena.query.ParameterizedSparqlString; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; @@ -58,6 +58,9 @@ public EquivalentObjectPropertyAxiomLearner(SparqlEndpointKS ks){ this.ks = ks; + super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s ?o WHERE {?s ?p ?o}"); + super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s ?o WHERE {?s ?p1 ?o. FILTER NOT EXISTS{?s ?p ?o}}"); + } public ObjectProperty getPropertyToDescribe() { @@ -74,119 +77,128 @@ startTime = System.currentTimeMillis(); fetchedRows = 0; currentlyBestAxioms = new ArrayList<EvaluatedAxiom>(); - //get existing super properties - SortedSet<ObjectProperty> existingSuperProperties = reasoner.getSuperProperties(propertyToDescribe); - logger.debug("Existing super properties: " + existingSuperProperties); + if(returnOnlyNewAxioms){ + //get existing domains + SortedSet<ObjectProperty> existingEquivalentProperties = reasoner.getEquivalentProperties(propertyToDescribe); + if(existingEquivalentProperties != null && !existingEquivalentProperties.isEmpty()){ + for(Obje... [truncated message content] |
From: <ki...@us...> - 2012-09-27 16:33:19
|
Revision: 3853 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3853&view=rev Author: kirdie Date: 2012-09-27 16:33:12 +0000 (Thu, 27 Sep 2012) Log Message: ----------- reintegrated the old approach to the learner2. Modified Paths: -------------- branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java branches/hmm/components-ext/pom.xml branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java =================================================================== --- branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java 2012-09-27 13:18:05 UTC (rev 3852) +++ branches/hmm/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java 2012-09-27 16:33:12 UTC (rev 3853) @@ -31,9 +31,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; - import org.dllearner.utilities.Helper; - import com.hp.hpl.jena.query.ResultSetFactory; import com.hp.hpl.jena.query.ResultSetRewindable; import com.hp.hpl.jena.rdf.model.Model; Modified: branches/hmm/components-ext/pom.xml =================================================================== --- branches/hmm/components-ext/pom.xml 2012-09-27 13:18:05 UTC (rev 3852) +++ branches/hmm/components-ext/pom.xml 2012-09-27 16:33:12 UTC (rev 3853) @@ -34,10 +34,10 @@ <groupId>com.jamonapi</groupId> <artifactId>jamon</artifactId> </dependency> - <dependency> + <!-- <dependency> <groupId>org.aksw.commons</groupId> <artifactId>sparql</artifactId> - </dependency> + </dependency> --> <dependency> <groupId>org.apache.solr</groupId> Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-27 13:18:05 UTC (rev 3852) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-27 16:33:12 UTC (rev 3853) @@ -9,15 +9,20 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; -import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import org.apache.commons.collections15.MultiMap; import org.apache.log4j.Logger; import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; @@ -49,7 +54,6 @@ import org.dllearner.common.index.IndexResultItem; import org.dllearner.common.index.IndexResultSet; import org.dllearner.common.index.MappingBasedIndex; -import org.dllearner.common.index.SOLRIndex; import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; import org.dllearner.common.index.SPARQLIndex; import org.dllearner.common.index.SPARQLObjectPropertiesIndex; @@ -70,7 +74,6 @@ import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.reasoning.SPARQLReasoner; -import org.ini4j.InvalidFileFormatException; import org.ini4j.Options; import org.semanticweb.owlapi.model.IRI; import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; @@ -83,10 +86,6 @@ import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; -import com.hp.hpl.jena.sparql.expr.ExprAggregator; -import com.hp.hpl.jena.sparql.expr.ExprVar; -import com.hp.hpl.jena.sparql.expr.aggregate.AggCount; -import com.hp.hpl.jena.sparql.expr.aggregate.Aggregator; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -95,19 +94,18 @@ * */ public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm { - public static boolean useHMM = true; - + private static final boolean USE_HMM = false; + /** synonyms are great but are not used yet by the HMM algorithm. **/ + private static final boolean HMM_USE_SYNONYMS = false; + /** The minimum score of items that are accepted from the Sindice search BOA index. **/ + private static final Double BOA_THRESHOLD = 0.9; enum Mode {BEST_QUERY, BEST_NON_EMPTY_QUERY} private Mode mode = Mode.BEST_QUERY; /** used to create a label out of the URI when there is no label available in the SPARQL endpoint.*/ private static SimpleIRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); + private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); - private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); - /** synonyms are great but are not used yet by the HMM algorithm. **/ - private static final boolean CREATE_SYNONYMS = false; - /** The minimum score of items that are accepted from the Sindice search BOA index. **/ - private static final Double BOA_THRESHOLD = 0.9; private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); @@ -403,7 +401,7 @@ logger.debug("Generating SPARQL query templates..."); templateMon.start(); if(multiThreaded){ - templates = templateGenerator.buildTemplatesMultiThreaded(question,CREATE_SYNONYMS); + templates = templateGenerator.buildTemplatesMultiThreaded(question,!USE_HMM||HMM_USE_SYNONYMS); } else { templates = templateGenerator.buildTemplates(question); } @@ -420,7 +418,7 @@ } //get the weighted query candidates - generatedQueries = getWeightedSPARQLQueries(templates); + generatedQueries = getWeightedSPARQLQueries(templates,USE_HMM); sparqlQueryCandidates = new ArrayList<WeightedQuery>(); int i = 0; for(WeightedQuery wQ : generatedQueries){ @@ -519,13 +517,15 @@ } - public Set<String> getRelevantKeywords(){ - return relevantKeywords; - } + public Set<String> getRelevantKeywords(){return relevantKeywords;} - // just for testing the HMM integration, getWeightedSPARQLQueriesOld is the original one - private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates) + private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates, boolean hmm) { + return hmm?getWeightedSPARQLQueriesWithHMM(templates):getWeightedSPARQLQueriesWithoutHMM(templates); + } + + private SortedSet<WeightedQuery> getWeightedSPARQLQueriesWithHMM(Set<Template> templates) + { // for testing for(Template template: templates) { @@ -621,7 +621,7 @@ return null; } - private SortedSet<WeightedQuery> getWeightedSPARQLQueriesOld(Set<Template> templates){ + private SortedSet<WeightedQuery> getWeightedSPARQLQueriesWithoutHMM(Set<Template> templates){ logger.debug("Generating SPARQL query candidates..."); Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { @@ -1295,6 +1295,7 @@ } return indexResultItems; } + class SlotProcessor implements Callable<Map<Slot, SortedSet<Allocation>>>{ private Slot slot; Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-09-27 13:18:05 UTC (rev 3852) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-09-27 16:33:12 UTC (rev 3853) @@ -11,7 +11,6 @@ import java.io.ObjectOutputStream; import java.io.PrintWriter; import java.io.Serializable; -import java.io.StringWriter; import java.util.Collection; import java.util.HashSet; import java.util.Set; @@ -37,11 +36,11 @@ public class QueryTestData implements Serializable { + private static final long serialVersionUID = 1L; public SortedMap<Integer, String> id2Question = new ConcurrentSkipListMap<Integer, String>(); public SortedMap<Integer, String> id2Query = new ConcurrentSkipListMap<Integer, String>(); public SortedMap<Integer, Set<String>> id2Answers = new ConcurrentSkipListMap<Integer, Set<String>>(); public SortedMap<Integer, LearnStatus> id2LearnStatus = new ConcurrentSkipListMap<Integer, LearnStatus>(); - private static final int MAXIMUM_QUESTIONS = Integer.MAX_VALUE; private static final String persistancePath = "cache/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+'/'+QueryTestData.class.getSimpleName(); @@ -85,8 +84,9 @@ /** reads test data from a QALD2 benchmark XML file, including questions, queries and answers. * each question needs to have a query but not necessarily an answer. * @param file a QALD benchmark XML file + * @param MAX_NUMBER_OF_QUESTIONS the maximum number of questions read from the file. * @return the test data read from the XML file */ - public static QueryTestData readQaldXml(final File file) + public static QueryTestData readQaldXml(final File file, int MAX_NUMBER_OF_QUESTIONS) { QueryTestData testData = new QueryTestData(); try { @@ -99,7 +99,7 @@ for(int i = 0; i < questionNodes.getLength(); i++) { - if(i>=MAXIMUM_QUESTIONS) break; // TODO: remove later? + if(i>MAX_NUMBER_OF_QUESTIONS) break; String question; String query; Set<String> answers = new HashSet<String>(); Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-27 13:18:05 UTC (rev 3852) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-27 16:33:12 UTC (rev 3853) @@ -55,7 +55,6 @@ import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.templator.Templator; import org.dllearner.algorithm.tbsl.util.Knowledgebase; -import org.dllearner.common.index.HierarchicalIndex; import org.dllearner.common.index.Index; import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; @@ -104,6 +103,7 @@ private static final File evaluationFolder = new File("cache/evaluation"); private static final boolean DBPEDIA_PRETAGGED = true; private static final boolean OXFORD_PRETAGGED = false; + private static final int MAX_NUMBER_OF_QUESTIONS = 10; @Test public void testDBpedia() throws Exception { @@ -120,28 +120,28 @@ test("Oxford 19 working questions", file,null,null,null,loadOxfordModel(),getOxfordMappingIndex()); } -// /*@Test*/ public void testOxford() throws Exception -// { -// Model model = loadOxfordModel(); -// QueryTestData testData = QueryTestData.readQaldXml(new File("log/oxford_working_questions.xml")); -// // answers are not included at least in the first query TODO: check, why -// testData.generateAnswers(null, null, model); -// QueryTestData newTestData = generateTestDataMultiThreaded(testData.id2Question, null, model,getOxfordMappingIndex() , OXFORD_PRETAGGED); -// newTestData.generateAnswers(null, null, model); -// for(int i : testData.id2Question.keySet()) -// { -// logger.info("Comparing answers for question "+testData.id2Question.get(i)); -// String referenceQuery = testData.id2Query.get(i); -// String newQuery = newTestData.id2Query.get(i); -// if(!referenceQuery.equals(newQuery)) -// { -// logger.warn("not equal, reference query: "+referenceQuery+", new query: "+newQuery); -// Collection<String> referenceAnswers = testData.id2Answers.get(i); -// Collection<String> newAnswers = newTestData.id2Answers.get(i); -// if(!referenceAnswers.equals(newAnswers)) fail("not equal, reference answers: "+referenceAnswers+", new answers: "+newAnswers); -// } -// } -// } + // /*@Test*/ public void testOxford() throws Exception + // { + // Model model = loadOxfordModel(); + // QueryTestData testData = QueryTestData.readQaldXml(new File("log/oxford_working_questions.xml")); + // // answers are not included at least in the first query TODO: check, why + // testData.generateAnswers(null, null, model); + // QueryTestData newTestData = generateTestDataMultiThreaded(testData.id2Question, null, model,getOxfordMappingIndex() , OXFORD_PRETAGGED); + // newTestData.generateAnswers(null, null, model); + // for(int i : testData.id2Question.keySet()) + // { + // logger.info("Comparing answers for question "+testData.id2Question.get(i)); + // String referenceQuery = testData.id2Query.get(i); + // String newQuery = newTestData.id2Query.get(i); + // if(!referenceQuery.equals(newQuery)) + // { + // logger.warn("not equal, reference query: "+referenceQuery+", new query: "+newQuery); + // Collection<String> referenceAnswers = testData.id2Answers.get(i); + // Collection<String> newAnswers = newTestData.id2Answers.get(i); + // if(!referenceAnswers.equals(newAnswers)) fail("not equal, reference answers: "+referenceAnswers+", new answers: "+newAnswers); + // } + // } + // } /** For debugging one question in particular. */ @@ -164,23 +164,23 @@ */ /*@Test*/ public void testSingleQueryDBpedia() { -// Logger.getLogger(Templator.class).setLevel(Level.DEBUG); -// Logger.getLogger(Parser.class).setLevel(Level.DEBUG); -// Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.DEBUG); + // Logger.getLogger(Templator.class).setLevel(Level.DEBUG); + // Logger.getLogger(Parser.class).setLevel(Level.DEBUG); + // Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.DEBUG); // String question = "houses for less than 900000 pounds"; String question = "Give/VB me/PRP all/DT video/JJ games/NNS published/VBN by/IN Mean/NNP Hamster/NNP Software/NNP"; -// String question = "give me all video games published by mean hamster software"; -// String question = "Give me all video games published by Mean Hamster Software"; -// question = new StanfordPartOfSpeechTagger().tag(question); -// System.out.println(question); + // String question = "give me all video games published by mean hamster software"; + // String question = "Give me all video games published by Mean Hamster Software"; + // question = new StanfordPartOfSpeechTagger().tag(question); + // System.out.println(question); -// Model model = loadOxfordModel(); + // Model model = loadOxfordModel(); QueryTestData testData = new QueryTestData(); new LearnQueryCallable(question, 0, testData, dbpediaLiveKnowledgebase, true).call(); logger.info("learned query: "+testData.id2Query.get(0)); } - - /*@Test*/ public void generateXMLOxford() throws IOException + + /*@Test*/ public void generateXMLOxford() throws IOException { boolean ADD_POS_TAGS = true; PartOfSpeechTagger posTagger = null; @@ -192,7 +192,7 @@ for(String line;(line=in.readLine())!=null;) { j++; - // if(j>5) break; // TODO: remove later + if(j>5) break; // TODO: remove later String question = line.replace("question: ", "").trim(); if(ADD_POS_TAGS&&!OXFORD_PRETAGGED) {question = posTagger.tag(question);} if(!line.trim().isEmpty()) {questions.add(question);} @@ -291,7 +291,7 @@ public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache,Knowledgebase kb, Model model, MappingBasedIndex index) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException - { + { evaluateAndWrite(title,referenceXML,endpoint,cache,kb,model,index); generateHTML(title); @@ -318,7 +318,7 @@ logger.info("Old test data not loadable, creating it and exiting."); } learnedTestData.write();*/ - } + } private File generateTestDataIfNecessary(final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException { @@ -335,9 +335,10 @@ } private void evaluateAndWrite(String title,final File updatedReferenceXML, final SparqlEndpoint endpoint,ExtractionDBCache cache, - Knowledgebase kb, Model model, MappingBasedIndex index) + Knowledgebase kb, Model model, MappingBasedIndex index) { - QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML); + + QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML,MAX_NUMBER_OF_QUESTIONS); logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); long startLearning = System.currentTimeMillis(); @@ -352,11 +353,6 @@ evaluation.write(); } - private void evaluateAndWrite() - { - - } - /** evaluates a data set against a reference. * @param reference the test data assumed to be correct. needs to contain the answers for all queries. * @param suspect the test data to compare with the reference. @@ -673,8 +669,8 @@ // try {testData.id2Answers.put(i,getUris(endpoint, learnedQuery));} // catch(Exception e) {logger.warn("Error with learned query "+learnedQuery+" for question "+question+" at endpoint "+endpoint+": "+e.getLocalizedMessage());} - long end = System.currentTimeMillis(); - // logger.debug(String.format("Generated query \"%s\" after %d ms", learnedQuery,end-start)); + // long end = System.currentTimeMillis(); + // logger.trace(String.format("Generated query \"%s\" after %d ms", learnedQuery,end-start)); // logger.info(String.format("Learned queries for %d of %d questions.",successes,id2Question.size())); @@ -779,7 +775,7 @@ // int successfullTestThreadRuns = 0; /** */ - private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://live.dbpedia.org/sparql"; + // private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://live.dbpedia.org/sparql"; private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3Test.class); @@ -806,7 +802,7 @@ Index propertiesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_properties"); SOLRIndex boa_propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/boa_fact_detail"); boa_propertiesIndex.setSortField("boa-score"); -// propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex); + // propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex); MappingBasedIndex mappingIndex= new MappingBasedIndex( SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_class_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_resource_mappings.txt").getPath(), This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-09-27 13:18:18
|
Revision: 3852 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3852&view=rev Author: kirdie Date: 2012-09-27 13:18:05 +0000 (Thu, 27 Sep 2012) Log Message: ----------- last commit merged the two sparqltemplatedbased2learner's into one file, this one renamed the file to the correct name. Added Paths: ----------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java Removed Paths: ------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2HMM.java Copied: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java (from rev 3851, branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2HMM.java) =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java (rev 0) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-27 13:18:05 UTC (rev 3852) @@ -0,0 +1,1440 @@ +package org.dllearner.algorithm.tbsl.learning; + +import hmm.HiddenMarkovModel; +import hmm.ResourceInfo; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; +import org.apache.commons.collections15.MultiMap; +import org.apache.log4j.Logger; +import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; +import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; +import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.PlingStemmer; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.WordNet; +import org.dllearner.algorithm.tbsl.sparql.Allocation; +import org.dllearner.algorithm.tbsl.sparql.Query; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Value; +import org.dllearner.algorithm.tbsl.sparql.Slot; +import org.dllearner.algorithm.tbsl.sparql.SlotType; +import org.dllearner.algorithm.tbsl.sparql.Template; +import org.dllearner.algorithm.tbsl.sparql.WeightedQuery; +import org.dllearner.algorithm.tbsl.templator.Templator; +import org.dllearner.algorithm.tbsl.util.Knowledgebase; +import org.dllearner.algorithm.tbsl.util.PopularityMap; +import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType; +import org.dllearner.algorithm.tbsl.util.Similarity; +import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper.SymPropertyDirection; +import org.dllearner.common.index.Index; +import org.dllearner.common.index.IndexResultItem; +import org.dllearner.common.index.IndexResultSet; +import org.dllearner.common.index.MappingBasedIndex; +import org.dllearner.common.index.SOLRIndex; +import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; +import org.dllearner.common.index.SPARQLIndex; +import org.dllearner.common.index.SPARQLObjectPropertiesIndex; +import org.dllearner.common.index.SPARQLPropertiesIndex; +import org.dllearner.common.index.VirtuosoDatatypePropertiesIndex; +import org.dllearner.common.index.VirtuosoObjectPropertiesIndex; +import org.dllearner.common.index.VirtuosoPropertiesIndex; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.LearningProblem; +import org.dllearner.core.SparqlQueryLearningAlgorithm; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.core.owl.Thing; +import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; +import org.dllearner.kb.SparqlEndpointKS; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.reasoning.SPARQLReasoner; +import org.ini4j.InvalidFileFormatException; +import org.ini4j.Options; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; +import com.hp.hpl.jena.ontology.OntModelSpec; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.Syntax; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; +import com.hp.hpl.jena.sparql.expr.ExprAggregator; +import com.hp.hpl.jena.sparql.expr.ExprVar; +import com.hp.hpl.jena.sparql.expr.aggregate.AggCount; +import com.hp.hpl.jena.sparql.expr.aggregate.Aggregator; +import com.jamonapi.Monitor; +import com.jamonapi.MonitorFactory; + +/** The old learner taken over by Konrad Höffner for experiments with the Hidden Markov Algorithm by Saedeeh Shekarpur. + * + * */ +public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm +{ + public static boolean useHMM = true; + + enum Mode {BEST_QUERY, BEST_NON_EMPTY_QUERY} + private Mode mode = Mode.BEST_QUERY; + + /** used to create a label out of the URI when there is no label available in the SPARQL endpoint.*/ + private static SimpleIRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); + + private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); + /** synonyms are great but are not used yet by the HMM algorithm. **/ + private static final boolean CREATE_SYNONYMS = false; + /** The minimum score of items that are accepted from the Sindice search BOA index. **/ + private static final Double BOA_THRESHOLD = 0.9; + private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); + private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); + + private boolean useRemoteEndpointValidation; + private boolean stopIfQueryResultNotEmpty; + private int maxTestedQueriesPerTemplate = 50; + private int maxQueryExecutionTimeInSeconds; + private int maxTestedQueries = 200; + private int maxIndexResults; + + private SparqlEndpoint endpoint = null; + private Model model = null; + + private ExtractionDBCache cache = new ExtractionDBCache("cache"); + + private Index resourcesIndex; + private Index classesIndex; + private Index propertiesIndex; + + private Index datatypePropertiesIndex; + private Index objectPropertiesIndex; + + private MappingBasedIndex mappingIndex; + + private Templator templateGenerator = null; + private Lemmatizer lemmatizer; + private PartOfSpeechTagger posTagger; + private WordNet wordNet; + + private String question; + private int learnedPos = -1; + + private Set<Template> templates; + private Map<Template, Collection<? extends Query>> template2Queries; + private Map<Slot, List<String>> slot2URI; + + private Collection<WeightedQuery> sparqlQueryCandidates; + private SortedSet<WeightedQuery> learnedSPARQLQueries; + private SortedSet<WeightedQuery> generatedQueries; + + private SPARQLReasoner reasoner; + + private String currentlyExecutedQuery; + + private boolean dropZeroScoredQueries = true; + private boolean useManualMappingsIfExistOnly = true; + + private boolean multiThreaded = true; + + private String [] grammarFiles = new String[]{"tbsl/lexicon/english.lex"}; + + private PopularityMap popularityMap; + + private Set<String> relevantKeywords; + + private boolean useDomainRangeRestriction = true; + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); + } + + public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), posTagger, wordNet, options); + } + + public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase){ + this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), new StanfordPartOfSpeechTagger(), new WordNet(), new Options()); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index){ + this(endpoint, index, new StanfordPartOfSpeechTagger()); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options()); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger){ + this(endpoint, index, posTagger, new WordNet(), new Options()); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options()); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, WordNet wordNet){ + this(endpoint, index, new StanfordPartOfSpeechTagger(), wordNet, new Options()); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, new Options(), new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet){ + this(endpoint, index, index, index, posTagger, wordNet, new Options(), new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(endpoint, index, index, index, posTagger, wordNet, options, new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ + this.endpoint = endpoint; + this.resourcesIndex = resourcesIndex; + this.classesIndex = classesIndex; + this.propertiesIndex = propertiesIndex; + this.posTagger = posTagger; + this.wordNet = wordNet; + this.cache = cache; + + setOptions(options); + + if(propertiesIndex instanceof SPARQLPropertiesIndex){ + if(propertiesIndex instanceof VirtuosoPropertiesIndex){ + datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } + } else { + datatypePropertiesIndex = propertiesIndex; + objectPropertiesIndex = propertiesIndex; + } + reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cache); + } + + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ + this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); + } + + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){ + this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options()); + } + + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){ + this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options()); + } + + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner2(Model model, MappingBasedIndex mappingBasedIndex, PartOfSpeechTagger posTagger) + { + this(model, new SPARQLIndex(model),new SPARQLIndex(model),new SPARQLIndex(model),posTagger); + setMappingIndex(mappingBasedIndex); + } + + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ + this.model = model; + this.resourcesIndex = resourcesIndex; + this.classesIndex = classesIndex; + this.propertiesIndex = propertiesIndex; + this.posTagger = posTagger; + this.wordNet = wordNet; + this.cache = cache; + + setOptions(options); + + if(propertiesIndex instanceof SPARQLPropertiesIndex){ + if(propertiesIndex instanceof VirtuosoPropertiesIndex){ + datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } + } else { + datatypePropertiesIndex = propertiesIndex; + objectPropertiesIndex = propertiesIndex; + } + reasoner = new SPARQLReasoner(new LocalModelBasedSparqlEndpointKS(ModelFactory.createOntologyModel(OntModelSpec.RDFS_MEM, model)), cache); + } + + public void setGrammarFiles(String[] grammarFiles) + { + if(templateGenerator==null) {throw new AssertionError("Learner not initialized. Please call init();");} + templateGenerator.setGrammarFiles(grammarFiles); + } + + @Override + public void init() throws ComponentInitException { + templateGenerator = new Templator(posTagger, wordNet, grammarFiles); + lemmatizer = new LingPipeLemmatizer(); + } + + public void setMappingIndex(MappingBasedIndex mappingIndex) { + this.mappingIndex = mappingIndex; + } + + public void setCache(ExtractionDBCache cache) { + this.cache = cache; + } + + public void setKnowledgebase(Knowledgebase knowledgebase){ + this.endpoint = knowledgebase.getEndpoint(); + this.resourcesIndex = knowledgebase.getResourceIndex(); + this.classesIndex = knowledgebase.getClassIndex(); + this.propertiesIndex = knowledgebase.getPropertyIndex(); + this.mappingIndex = knowledgebase.getMappingIndex(); + if(propertiesIndex instanceof SPARQLPropertiesIndex){ + if(propertiesIndex instanceof VirtuosoPropertiesIndex){ + datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } + } else { + datatypePropertiesIndex = propertiesIndex; + objectPropertiesIndex = propertiesIndex; + } + reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); + } + + public void setUseDomainRangeRestriction(boolean useDomainRangeRestriction) { + this.useDomainRangeRestriction = useDomainRangeRestriction; + } + + /* + * Only for Evaluation useful. + */ + public void setUseIdealTagger(boolean value){ + templateGenerator.setUNTAGGED_INPUT(!value); + } + + private void setOptions(Options options){ + maxIndexResults = Integer.parseInt(options.get("solr.query.limit", "10")); + + maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20")); + cache.setMaxExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds); + + useRemoteEndpointValidation = options.get("learning.validationType", "remote").equals("remote") ? true : false; + stopIfQueryResultNotEmpty = Boolean.parseBoolean(options.get("learning.stopAfterFirstNonEmptyQueryResult", "true")); + maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20")); + + String wordnetPath = options.get("wordnet.dictionary", "tbsl/dict"); + wordnetPath = this.getClass().getClassLoader().getResource(wordnetPath).getPath(); + System.setProperty("wordnet.database.dir", wordnetPath); + } + + public void setEndpoint(SparqlEndpoint endpoint){ + this.endpoint = endpoint; + + reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); + reasoner.setCache(cache); + reasoner.prepareSubsumptionHierarchy(); + } + + public void setQuestion(String question){ + this.question = question; + } + + public void setUseRemoteEndpointValidation(boolean useRemoteEndpointValidation){ + this.useRemoteEndpointValidation = useRemoteEndpointValidation; + } + + public int getMaxQueryExecutionTimeInSeconds() { + return maxQueryExecutionTimeInSeconds; + } + + public void setMaxQueryExecutionTimeInSeconds(int maxQueryExecutionTimeInSeconds) { + this.maxQueryExecutionTimeInSeconds = maxQueryExecutionTimeInSeconds; + } + + public int getMaxTestedQueriesPerTemplate() { + return maxTestedQueriesPerTemplate; + } + + public void setMaxTestedQueriesPerTemplate(int maxTestedQueriesPerTemplate) { + this.maxTestedQueriesPerTemplate = maxTestedQueriesPerTemplate; + } + + private void reset(){ + learnedSPARQLQueries = new TreeSet<WeightedQuery>(); + template2Queries = new HashMap<Template, Collection<? extends Query>>(); + slot2URI = new HashMap<Slot, List<String>>(); + relevantKeywords = new HashSet<String>(); + currentlyExecutedQuery = null; + + // templateMon.reset(); + // sparqlMon.reset(); + } + + public void learnSPARQLQueries() throws NoTemplateFoundException{ + reset(); + //generate SPARQL query templates + logger.debug("Generating SPARQL query templates..."); + templateMon.start(); + if(multiThreaded){ + templates = templateGenerator.buildTemplatesMultiThreaded(question,CREATE_SYNONYMS); + } else { + templates = templateGenerator.buildTemplates(question); + } + templateMon.stop(); + logger.debug("Done in " + templateMon.getLastValue() + "ms."); + relevantKeywords.addAll(templateGenerator.getUnknownWords()); + if(templates.isEmpty()){ + throw new NoTemplateFoundException(); + + } + logger.debug("Templates:"); + for(Template t : templates){ + logger.debug(t); + } + + //get the weighted query candidates + generatedQueries = getWeightedSPARQLQueries(templates); + sparqlQueryCandidates = new ArrayList<WeightedQuery>(); + int i = 0; + for(WeightedQuery wQ : generatedQueries){ + logger.debug(wQ.explain()); + sparqlQueryCandidates.add(wQ); + if(i == maxTestedQueries){ + break; + } + i++; + } + + if(mode == Mode.BEST_QUERY){ + double bestScore = -1; + for(WeightedQuery candidate : generatedQueries){ + double score = candidate.getScore(); + if(score >= bestScore){ + bestScore = score; + learnedSPARQLQueries.add(candidate); + } else { + break; + } + } + } else if(mode == Mode.BEST_NON_EMPTY_QUERY){ + //test candidates + if(useRemoteEndpointValidation){ //on remote endpoint + validateAgainstRemoteEndpoint(sparqlQueryCandidates); + } else {//on local model + + } + } + } + + public SortedSet<WeightedQuery> getGeneratedQueries() { + return generatedQueries; + } + + public SortedSet<WeightedQuery> getGeneratedQueries(int topN) { + SortedSet<WeightedQuery> topNQueries = new TreeSet<WeightedQuery>(); + int max = Math.min(topN, generatedQueries.size()); + for(WeightedQuery wQ : generatedQueries){ + topNQueries.add(wQ); + if(topNQueries.size() == max){ + break; + } + } + return topNQueries; + } + + public Set<Template> getTemplates(){ + return templates; + } + + public List<String> getGeneratedSPARQLQueries(){ + List<String> queries = new ArrayList<String>(); + for(WeightedQuery wQ : sparqlQueryCandidates){ + queries.add(wQ.getQuery().toString()); + } + + return queries; + } + + public Map<Template, Collection<? extends Query>> getTemplates2SPARQLQueries(){ + return template2Queries; + } + + public Map<Slot, List<String>> getSlot2URIs(){ + return slot2URI; + } + + private void normProminenceValues(Set<Allocation> allocations){ + double min = 0; + double max = 0; + for(Allocation a : allocations){ + if(a.getProminence() < min){ + min = a.getProminence(); + } + if(a.getProminence() > max){ + max = a.getProminence(); + } + } + if(min==max) {return;} + for(Allocation a : allocations){ + double prominence = a.getProminence()/(max-min); + a.setProminence(prominence); + } + } + + private void computeScore(Set<Allocation> allocations){ + double alpha = 0.8; + double beta = 1 - alpha; + + for(Allocation a : allocations){ + double score = alpha * a.getSimilarity() + beta * a.getProminence(); + a.setScore(score); + } + + } + + public Set<String> getRelevantKeywords(){ + return relevantKeywords; + } + + // just for testing the HMM integration, getWeightedSPARQLQueriesOld is the original one + private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates) + { + // for testing + for(Template template: templates) + { + { + ArrayList<String> keywords = new ArrayList<String>(); + for(Slot slot: template.getSlots()) + { + keywords.add(slot.getWords().get(0)); + } + if(template.getSlots().size()!=3) {continue;} +// if(!keywords.contains("Mean Hamster Software")) {continue;} +// if(!keywords.contains("published")) {continue;} + System.out.println("\"keywords\": "+keywords); + } + System.out.println(template); + SortedSet<WeightedQuery> queries = new TreeSet<WeightedQuery>(); + Query query = template.getQuery(); + double score = 0; + + Map<List<String>,List<ResourceInfo>> segmentToURIs = new HashMap<List<String>,List<ResourceInfo>>(); + Map<String,IndexResultItem> uriUniqueToResultItem = new HashMap<String,IndexResultItem>(); + for(Slot slot: template.getSlots()) + { + List<String> segment = new LinkedList<String>(); + segment.addAll(Arrays.asList(slot.getWords().get(0).split("\\s"))); + List<ResourceInfo> resourceInfos = new LinkedList<ResourceInfo>(); + + for(IndexResultItem item : getIndexResultItems(slot)) + { + // if this gets used at another place, create a function IndexResultItemToResourceInfo() + ResourceInfo info = new ResourceInfo(); + info.setUri(item.getUri()); + String label = item.getLabel(); + // in dbpedia, the last part of the uri is transformed from the english label, reverse the transformation (should almost always work for dbpedia article resources) + info.setLabel(label!=null?label:sfp.getShortForm(IRI.create(item.getUri()))); + // in saedeehs algorithm, the emission probabilty is formed by the string similarity + // but we use the lucene index score + double max = 0; + for(String word: slot.getWords()) {max = Math.max(max, Similarity.getSimilarity(word, info.getLabel()));} + if(max<0||max>1) throw new AssertionError("max is not in [0,1], max="+max); + info.setStringSimilarityScore(max); + if(!info.setTypeFromDBpediaURI()) throw new AssertionError("could not set type for info "+info); + System.err.println("info with type: "+info); + resourceInfos.add(info); + } + segmentToURIs.put(segment,resourceInfos); + } + HiddenMarkovModel hmm = new HiddenMarkovModel(); + hmm.initialization(); + hmm.startMarkovModel(segmentToURIs,true); + MultiMap<Double,List<String>> paths = hmm.getPaths(); + + // System.out.println(hmm.getPaths()); + // die keywords jetzt in sadeehs algorithmus reinwerfen + // da kommen jetzt pfade raus mit unterschiedlichen wahrscheinlichkeiten + // HiddenMarkovModel HMM = new HiddenMarkovModel(); + // HMM.StartMarkovModel(); + // jetzt die variablen aus der query ersetzen mit den kandidaten + // ranked list der pfade, die die observation sequence generieren + + for(Double d : paths.keySet()) + { + for(List<String> path : paths.get(d)) + { + Query q = new Query(query); + // TODO: which variable stands for which resource? do it randomly now to check if the replacement works and then correct the order later + System.out.println(q.getVariablesAsStringList()); + System.out.println(); + int i = 0; + for(String var : q.getVariablesAsStringList()) + { + q.replaceVarWithURI(var, path.get(i)); + i++; + } + System.out.println(q); + + + WeightedQuery wQuery = new WeightedQuery(q, score); + queries.add(wQuery); + } + } + //System.exit(0); + return queries; + // >> SLOTS: + // y0: RESOURCE {Mean Hamster Software} + // p0: OBJECTPROPERTY {published,print} + // p1: CLASS {video games} + + + // System.out.println(template); + } + // + return null; + } + + private SortedSet<WeightedQuery> getWeightedSPARQLQueriesOld(Set<Template> templates){ + logger.debug("Generating SPARQL query candidates..."); + + Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { + + @Override + public int compare(Slot o1, Slot o2) { + if(o1.getSlotType() == o2.getSlotType()){ + return o1.getToken().compareTo(o2.getToken()); + } else { + return -1; + } + } + }); + slot2Allocations = Collections.synchronizedMap(new HashMap<Slot, Set<Allocation>>()); + + + SortedSet<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); + + Set<Allocation> allocations; + + for(Template t : templates){ + logger.info("Processing template:\n" + t.toString()); + allocations = new TreeSet<Allocation>(); + boolean containsRegex = t.getQuery().toString().toLowerCase().contains("(regex("); + + ExecutorService executor = Executors.newFixedThreadPool(t.getSlots().size()); + List<Future<Map<Slot, SortedSet<Allocation>>>> list = new ArrayList<Future<Map<Slot, SortedSet<Allocation>>>>(); + + long startTime = System.currentTimeMillis(); + + for (Slot slot : t.getSlots()) { + if(!slot2Allocations.containsKey(slot)){//System.out.println(slot + ": " + slot.hashCode());System.out.println(slot2Allocations); + Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot); + Future<Map<Slot, SortedSet<Allocation>>> submit = executor.submit(worker); + list.add(submit); + } + } + + for (Future<Map<Slot, SortedSet<Allocation>>> future : list) { + try { + Map<Slot, SortedSet<Allocation>> result = future.get(); + Entry<Slot, SortedSet<Allocation>> item = result.entrySet().iterator().next(); + slot2Allocations.put(item.getKey(), item.getValue()); + } catch (InterruptedException e) { + e.printStackTrace(); + } catch (ExecutionException e) { + e.printStackTrace(); + } + } + + executor.shutdown(); + + + /*for(Slot slot : t.getSlots()){ + allocations = slot2Allocations2.get(slot); + if(allocations == null){ + allocations = computeAllocations(slot, 10); + slot2Allocations2.put(slot, allocations); + } + slot2Allocations.put(slot, allocations); + + //for tests add the property URI with http://dbpedia.org/property/ namespace + //TODO should be replaced by usage of a separate SOLR index + Set<Allocation> tmp = new HashSet<Allocation>(); + if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ + for(Allocation a : allocations){ + String uri = "http://dbpedia.org/property/" + a.getUri().substring(a.getUri().lastIndexOf("/")+1); + Allocation newA = new Allocation(uri, a.getSimilarity(), a.getProminence()); + newA.setScore(a.getScore()-0.000001); + tmp.add(newA); + } + } + allocations.addAll(tmp); + }*/ + logger.debug("Time needed: " + (System.currentTimeMillis() - startTime) + "ms"); + + Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); + Query cleanQuery = t.getQuery(); + queries.add(new WeightedQuery(cleanQuery)); + + Set<WeightedQuery> tmp = new TreeSet<WeightedQuery>(); + List<Slot> sortedSlots = new ArrayList<Slot>(); + Set<Slot> classSlots = new HashSet<Slot>(); + for(Slot slot : t.getSlots()){ + if(slot.getSlotType() == SlotType.CLASS){ + sortedSlots.add(slot); + classSlots.add(slot); + } + } + for(Slot slot : t.getSlots()){ + if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY || slot.getSlotType() == SlotType.DATATYPEPROPERTY){ + sortedSlots.add(slot); + } + } + for(Slot slot : t.getSlots()){ + if(!sortedSlots.contains(slot)){ + sortedSlots.add(slot); + } + } + //add for each SYMPROPERTY Slot the reversed query + for(Slot slot : sortedSlots){ + for(WeightedQuery wQ : queries){ + if(slot.getSlotType() == SlotType.SYMPROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY){ + Query reversedQuery = new Query(wQ.getQuery()); + reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); + tmp.add(new WeightedQuery(reversedQuery)); + } + tmp.add(wQ); + } + queries.clear(); + queries.addAll(tmp); + tmp.clear(); + } + + for(Slot slot : sortedSlots){ + if(!slot2Allocations.get(slot).isEmpty()){ + for(Allocation a : slot2Allocations.get(slot)){ + for(WeightedQuery query : queries){ + Query q = new Query(query.getQuery()); + + boolean drop = false; + if(useDomainRangeRestriction){ + if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + String objectVar = triple.getValue().getName(); + String subjectVar = triple.getVariable().getName(); + // System.out.println(triple); + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ + // System.out.println(typeTriple); + if(true){//reasoner.isObjectProperty(a.getUri())){ + Description range = reasoner.getRange(new ObjectProperty(a.getUri())); + // System.out.println(a); + if(range != null){ + Set<Description> allRanges = new HashSet<Description>(); + SortedSet<Description> superClasses; + if(range instanceof NamedClass){ + superClasses = reasoner.getSuperClasses(range); + allRanges.addAll(superClasses); + } else { + for(Description nc : range.getChildren()){ + superClasses = reasoner.getSuperClasses(nc); + allRanges.addAll(superClasses); + } + } + allRanges.add(range); + allRanges.remove(new NamedClass(Thing.instance.getURI())); + + Set<Description> allTypes = new HashSet<Description>(); + String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); + Description type = new NamedClass(typeURI); + superClasses = reasoner.getSuperClasses(type); + allTypes.addAll(superClasses); + allTypes.add(type); + + if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ + drop = true; + } + } + } else { + drop = true; + } + + } + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ + Description domain = reasoner.getDomain(new ObjectProperty(a.getUri())); + // System.out.println(a); + if(domain != null){ + Set<Description> allDomains = new HashSet<Description>(); + SortedSet<Description> superClasses; + if(domain instanceof NamedClass){ + superClasses = reasoner.getSuperClasses(domain); + allDomains.addAll(superClasses); + } else { + for(Description nc : domain.getChildren()){ + superClasses = reasoner.getSuperClasses(nc); + allDomains.addAll(superClasses); + } + } + allDomains.add(domain); + allDomains.remove(new NamedClass(Thing.instance.getURI())); + + Set<Description> allTypes = new HashSet<Description>(); + String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); + Description type = new NamedClass(typeURI); + superClasses = reasoner.getSuperClasses(type); + allTypes.addAll(superClasses); + allTypes.add(type); + + if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ + drop = true; + } else { + + } + } + } + } + } + } + + if(!drop){ + if(slot.getSlotType() == SlotType.RESOURCE){//avoid queries where predicate is data property and object resource->add REGEX filter in this case + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + SPARQL_Value object = triple.getValue(); + if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position + SPARQL_Property predicate = triple.getProperty(); + if(!predicate.isVariable()){//only consider triple where predicate is URI + String predicateURI = predicate.getName().replace("<", "").replace(">", ""); + if(isDatatypeProperty(predicateURI)){//if data property + q.addFilter(new SPARQL_Filter(new SPARQL_Pair( + object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } + } + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } + WeightedQuery w = new WeightedQuery(q); + double newScore = query.getScore() + a.getScore(); + w.setScore(newScore); + w.addAllocations(query.getAllocations()); + w.addAllocation(a); + tmp.add(w); + } + + + } + } + //lower queries with FILTER-REGEX + if(containsRegex){ + for(WeightedQuery wQ : tmp){ + wQ.setScore(wQ.getScore() - 0.01); + } + } + + queries.clear(); + queries.addAll(tmp);//System.out.println(tmp); + tmp.clear(); + } else {//Add REGEX FILTER if resource slot is empty and predicate is datatype property + if(slot.getSlotType() == SlotType.RESOURCE){ + for(WeightedQuery query : queries){ + Query q = query.getQuery(); + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + SPARQL_Value object = triple.getValue(); + if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position + SPARQL_Property predicate = triple.getProperty(); + if(!predicate.isVariable()){//only consider triple where predicate is URI + String predicateURI = predicate.getName().replace("<", "").replace(">", ""); + if(isDatatypeProperty(predicateURI)){//if data property + q.addFilter(new SPARQL_Filter(new SPARQL_Pair( + object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); + } + } + } + } + + } + + } else { + if(slot.getSlotType() == SlotType.SYMPROPERTY){ + for(WeightedQuery wQ : queries){ + List<SPARQL_Triple> triples = wQ.getQuery().getTriplesWithVar(slot.getAnchor()); + for(SPARQL_Triple triple : triples){ + String typeVar; + String resourceURI; + SymPropertyDirection direction; + if(triple.getValue().isVariable()){ + direction = SymPropertyDirection.VAR_RIGHT; + typeVar = triple.getValue().getName(); + resourceURI = triple.getVariable().getName(); + } else { + direction = SymPropertyDirection.VAR_LEFT; + typeVar = triple.getVariable().getName(); + resourceURI = triple.getValue().getName(); + } + resourceURI = resourceURI.replace("<", "").replace(">", ""); + List<SPARQL_Triple> typeTriples = wQ.getQuery().getRDFTypeTriples(typeVar); + for(SPARQL_Triple typeTriple : typeTriples){ + String typeURI = typeTriple.getValue().getName().replace("<", "").replace(">", ""); + // List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction); + // for(Entry<String, Integer> property : mostFrequentProperties){ + // wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey()); + // wQ.setScore(wQ.getScore() + 0.1); + // } + } + + } + } + } + } + // else if(slot.getSlotType() == SlotType.CLASS){ + // String token = slot.getWords().get(0); + // if(slot.getToken().contains("house")){ + // String regexToken = token.replace("houses", "").replace("house", "").trim(); + // try { + // Map<Slot, SortedSet<Allocation>> ret = new SlotProcessor(new Slot(null, SlotType.CLASS, Collections.singletonList("house"))).call(); + // SortedSet<Allocation> alloc = ret.entrySet().iterator().next().getValue(); + // if(alloc != null && !alloc.isEmpty()){ + // String uri = alloc.first().getUri(); + // for(WeightedQuery query : queries){ + // Query q = query.getQuery(); + // for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + // SPARQL_Term subject = triple.getVariable(); + // SPARQL_Term object = new SPARQL_Term("desc"); + // object.setIsVariable(true); + // object.setIsURI(false); + // q.addCondition(new SPARQL_Triple(subject, new SPARQL_Property("<http://purl.org/goodrelations/v1#description>"), object)); + // q.addFilter(new SPARQL_Filter(new SPARQL_Pair( + // object, "'" + regexToken + "'", SPARQL_PairType.REGEX))); + // } + // q.replaceVarWithURI(slot.getAnchor(), uri); + // + // } + // } + // } catch (Exception e) { + // e.printStackTrace(); + // } + // } + // } + + + } + + } + for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) { + WeightedQuery wQ = iterator.next(); + if(dropZeroScoredQueries){ + if(wQ.getScore() <= 0){ + iterator.remove(); + } + } else { + if(t.getSlots().size()==0) throw new AssertionError("no slots for query "+wQ); + wQ.setScore(wQ.getScore()/t.getSlots().size()); + } + + } + allQueries.addAll(queries); + List<Query> qList = new ArrayList<Query>(); + for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery()); + qList.add(wQ.getQuery()); + } + template2Queries.put(t, qList); + } + logger.debug("...done in "); + return allQueries; + } + + private double getProminenceValue(String uri, SlotType type){ + Integer popularity = null; + if(popularityMap != null){ + if(type == SlotType.CLASS){ + popularity = popularityMap.getPopularity(uri, EntityType.CLASS); + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY + || type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){ + popularity = popularityMap.getPopularity(uri, EntityType.PROPERTY); + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + popularity = popularityMap.getPopularity(uri, EntityType.RESOURCE); + } + } + if(popularity == null){ + String query = null; + if(type == SlotType.CLASS){ + query = "SELECT COUNT(?s) WHERE {?s a <%s>}"; + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY + || type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){ + query = "SELECT COUNT(*) WHERE {?s <%s> ?o}"; + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + query = "SELECT COUNT(*) WHERE {?s ?p <%s>}"; + } + query = String.format(query, uri); + + ResultSet rs = executeSelect(query); + QuerySolution qs; + String projectionVar; + while(rs.hasNext()){ + qs = rs.next(); + projectionVar = qs.varNames().next(); + popularity = qs.get(projectionVar).asLiteral().getInt(); + } + } + if(popularity == null){ + popularity = Integer.valueOf(0); + } + System.out.println(popularity); + + + // if(cnt == 0){ + // return 0; + // } + // return Math.log(cnt); + if(popularity!=popularity) {throw new AssertionError("prominence NaN for uri "+uri+", slot type "+type);} + return popularity; + } + + public void setPopularityMap(PopularityMap popularityMap) { + this.popularityMap = popularityMap; + } + + + private List<String> pruneList(List<String> words){ + List<String> prunedList = new ArrayList<String>(); + for(String w1 : words){ + boolean smallest = true; + for(String w2 : words){ + if(!w1.equals(w2)){ + if(w1.contains(w2)){ + smallest = false; + break; + } + } + } + if(smallest){ + prunedList.add(w1); + } + } + logger.info("Pruned list: " + prunedList); + // return getLemmatizedWords(words); + return prunedList; + } + + private List<String> getLemmatizedWords(List<String> words){ + logger.info("Pruning word list " + words + "..."); + // mon.start(); + List<String> pruned = new ArrayList<String>(); + for(String word : words){ + //currently only stem single words + if(word.contains(" ")){ + pruned.add(word); + } else { + String lemWord = lemmatizer.stem(word); + if(!pruned.contains(lemWord)){ + pruned.add(lemWord); + } + } + + } + // mon.stop(); + // logger.info("Done in " + mon.getLastValue() + "ms."); + logger.info("Pruned list: " + pruned); + return pruned; + } + + + private Index getIndexBySlotType(Slot slot){ + Index index = null; + SlotType type = slot.getSlotType(); + if(type == SlotType.CLASS){ + index = classesIndex; + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ + index = propertiesIndex; + } else if(type == SlotType.DATATYPEPROPERTY){ + index = datatypePropertiesIndex; + } else if(type == SlotType.OBJECTPROPERTY){ + index = objectPropertiesIndex; + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + index = resourcesIndex; + } + return index; + } + + private void validateAgainstRemoteEndpoint(Collection<WeightedQuery> queries){ + SPARQL_QueryType queryType = queries.iterator().next().getQuery().getQt(); + validate(queries, queryType); + } + + private void validate(Collection<WeightedQuery> queries, SPARQL_QueryType queryType){ + logger.debug("Testing candidate SPARQL queries on remote endpoint..."); + sparqlMon.start(); + if(queryType == SPARQL_QueryType.SELECT){ + for(WeightedQuery query : queries){ + learnedPos++; + List<String> results; + try { + logger.debug("Testing query:\n" + query); + com.hp.hpl.jena.query.Query q = QueryFactory.create(query.getQuery().toString(), Syntax.syntaxARQ); + q.setLimit(1); + ResultSet rs = executeSelect(q.toString()); + + results = new ArrayList<String>(); + QuerySolution qs; + String projectionVar; + while(rs.hasNext()){ + qs = rs.next(); + projectionVar = qs.varNames().next(); + if(qs.get(projectionVar).isLiteral()){ + results.add(qs.get(projectionVar).asLiteral().getLexicalForm()); + } else if(qs.get(projectionVar).isURIResource()){ + results.add(qs.get(projectionVar).asResource().getURI()); + } + + } + if(!results.isEmpty()){ + try{ + int cnt = Integer.parseInt(results.get(0)); + if(cnt > 0){ + learnedSPARQLQueries.add(query); + if(stopIfQueryResultNotEmpty){ + return; + } + } + } catch (NumberFormatException e){ + learnedSPARQLQueries.add(query); + if(stopIfQueryResultNotEmpty){ + return; + } + } + logger.debug("Result: " + results); + } + } catch (Exception e) { + e.printStackTrace(); + } + + } + } else if(queryType == SPARQL_QueryType.ASK){ + for(WeightedQuery query : queries){ + learnedPos++; + logger.debug("Testing query:\n" + query); + boolean result = executeAskQuery(query.getQuery().toString()); + learnedSPARQLQueries.add(query); + // if(stopIfQueryResultNotEmpty && result){ + // return; + // } + if(stopIfQueryResultNotEmpty){ + return; + } + logger.debug("Result: " + result); + } + } + + sparqlMon.stop(); + logger.debug("Done in " + sparqlMon.getLastValue() + "ms."); + } + + private boolean executeAskQuery(String query) + { + if(query==null) throw new NullPointerException("Parameter query == null"); + currentlyExecutedQuery = query; + + boolean ret; + if (model == null) + { + QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); + qe.setDefaultGraphURIs(endpoint.getDefaultGraphURIs()); + ret = qe.execAsk(); + } + else {ret = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model).execAsk();} + return ret; + } + + private ResultSet executeSelect(String query) + { + if(query==null) throw new NullPointerException("Parameter query == null"); + currentlyExecutedQuery = query; + ResultSet rs; + if (model == null) { + if (cache == null) { + QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); + qe.setDefaultGraphURIs(endpoint.getDefaultGraphURIs()); + rs = qe.execSelect(); + } else { + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + } + } else { + rs = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model) + .execSelect(); + } + + return rs; + } + + public String getCurrentlyExecutedQuery() { + return currentlyExecutedQuery; + } + + public int getLearnedPosition() { + if(learnedPos >= 0){ + return learnedPos+1; + } + return learnedPos; + } + + @Override + public void start() { + } + + @Override + public List<String> getCurrentlyBestSPARQLQueries(int nrOfSPARQLQueries) { + List<String> bestQueries = new ArrayList<String>(); + for(WeightedQuery wQ : learnedSPARQLQueries){ + bestQueries.add(wQ.getQuery().toString()); + } + return bestQueries; + } + + @Override + public String getBestSPARQLQuery() { + if(!learnedSPARQLQueries.isEmpty()){ + return learnedSPARQLQueries.iterator().next().getQuery().toString(); + } else { + return null; + } + } + + public SortedSet<WeightedQuery> getLearnedSPARQLQueries() { + return learnedSPARQLQueries; + } + + @Override + public LearningProblem getLearningProblem() { + // TODO Auto-generated method stub + return null; + } + + @Override + public void setLearningProblem(LearningProblem learningProblem) { + // TODO Auto-generated method stub + + } + + private Set<IndexResultItem> getIndexResultItems(Slot slot) + { + // List<String> uris = new LinkedList<String>(); + Set<IndexResultItem> indexResultItems = new HashSet<IndexResultItem>(); + + Index index = getIndexBySlotType(slot); + + for(String word : slot.getWords()) + { + IndexResultSet rs = new IndexResultSet(); + if(mappingIndex != null){ + SlotType type = slot.getSlotType(); + if(type == SlotType.CLASS){ + rs.add(mappingIndex.getClassesWithScores(word)); + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ + rs.add(mappingIndex.getPropertiesWithScores(word)); + } else if(type == SlotType.DATATYPEPROPERTY){ + rs.add(mappingIndex.getDatatypePropertiesWithScores(word)); + } else if(type == SlotType.OBJECTPROPERTY){ + rs.add(mappingIndex.getObjectPropertiesWithScores(word)); + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + rs.add(mappingIndex.getResourcesWithScores(word)); + } + } + //use the non manual indexes only if mapping based resultset is not empty and option is set + if(!useManualMappingsIfExistOnly || rs.isEmpty()){ + if(slot.getSlotType() == SlotType.RESOURCE){ + rs.add(index.getResourcesWithScores(word, 20,0)); + } else { + if(slot.getSlotType() == SlotType.CLASS){ + word = PlingStemmer.stem(word); + } + IndexResultSet tmp = index.getResourcesWithScores(word, 20,0,Collections.singleton("boa-score")); + for(IndexResultItem item : tmp.getItems()) + {System.out.println(item); + Double boaScore = (Double) item.getFields().get("boa-score"); + if(boaScore==null||boaScore>BOA_THRESHOLD) rs.addItem(item); + } + } + } + // for(IndexResultItem item: rs.getItems()) + // { + // uris.add(item.getUri()); + // } + indexResultItems.addAll(rs.getItems()); + } + return indexResultItems; + } + class SlotProcessor implements Callable<Map<Slot, SortedSet<Allocation>>>{ + + private Slot slot; + + public SlotProcessor(Slot slot) { + this.slot = slot; + } + + @Override + public Map<Slot, SortedSet<Allocation>> call() throws Exception { + Map<Slot, SortedSet<Allocation>> result = new HashMap<Slot, SortedSet<Allocation>>(); + result.put(slot, computeAllocations(slot)); + return result; + } + + private SortedSet<Allocation> computeAllocations(Slot slot){ + logger.debug("Computing allocations for slot: " + slot); + SortedSet<Allocation> allocations = new TreeSet<Allocation>(); + + Index index = getIndexBySlotType(slot); + + IndexResultSet rs; + for(String word : slot.getWords()){ + rs = new IndexResultSet(); + if(mappingIndex != null){ + SlotType type = slot.getSlotType(); + if(type == SlotType.CLASS){ + rs.add(mappingIndex.getClassesWithScores(word)); + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ + rs.add(mappingIndex.getPropertiesWithScores(word)); + } else if(type == SlotType.DATATYPEPROPERTY){ + rs.add(mappingIndex.getDatatypePropertiesWithScores(word)); + } else if(type == SlotType.OBJECTPROPERTY){ + rs.add(mappingIndex.getObjectPropertiesWithScores(word)); + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + rs.add(mappingIndex.getResourcesWithScores(word)); + } + } + //use the non manual indexes only if mapping based resultset is not empty and option is set + if(!useManualMappingsIfExistOnly || rs.isEmpty()){ + if(slot.getSlotType() == SlotType.RESOURCE){ + rs.add(index.getResourcesWithScores(word, 20)); + } else { + if(slot.getSlotType() == SlotType.CLASS){ + word = PlingStemmer.stem(word); + } + rs.add(index.getResourcesWithScores(word, 20)); + } + } + + + for(IndexResultItem item : rs.getItems()){ + double similarity = Similarity.getSimilarity(word, item.getLabel()); + // //get the labels of the redirects and compute the highest similarity + // if(slot.getSlotType() == SlotType.RESOURCE){ + // Set<String> labels = getRedirectLabels(item.getUri()); + // for(String label : labels){ + // double tmp = Similarity.getSimilarity(word, label); + // if(tmp > similarity){ + // similarity = tmp; + // } + // } + // } + double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); + allocations.add(new Allocation(item.getUri(), prominence, similarity)); + } + + } + + normProminenceValues(allocations); + + computeScore(allocations); + logger.debug("Found " + allocations.size() + " allocations for slot " + slot); + return new TreeSet<Allocation>(allocations); + } + + private Index getIndexBySlotType(Slot slot){ + Index index = null; + SlotType type = slot.getSlotType(); + if(type == SlotType.CLASS){ + index = classesIndex; + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ + index = propertiesIndex; + } else if(type == SlotType.DATATYPEPROPERTY){ + index = datatypePropertiesIndex; + } else if(type == SlotType.OBJECTPROPERTY){ + index = objectPropertiesIndex; + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + index = resourcesIndex; + } + return index; + } + + } + + public String getTaggedInput() + { + if(templateGenerator==null) {throw new AssertionError("Learner not initialized. Please call init();");} + return templateGenerator.getTaggedInput(); + } + + private boolean isDatatypeProperty(String uri){ + Boolean isDatatypeProperty = null; + if(mappingIndex != null){ + isDatatypeProperty = mappingIndex.isDataProperty(uri); + } + if(isDatatypeProperty == null){ + String query = String.format("ASK {<%s> a <http://www.w3.org/2002/07/owl#DatatypeProperty> .}", uri); + isDatatypeProperty = executeAskQuery(query); + } + return isDatatypeProperty; + } + + // /** + // * @param args + // * @throws NoTemplateFoundException + // * @throws IOException + // * @throws FileNotFoundException + // * @throws InvalidFileFormatException + // */ + // public static void main(String[] args) throws Exception { + // SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), + // Collections.<String>singletonList(""), Collections.<String>emptyList()); + // Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources"); + // Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes"); + // Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties"); + // + // SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2HMM(endpoint, resourcesIndex, classesIndex, propertiesIndex); + // learner.init(); + // + // String question = "What is the highest mountain?"; + // + // learner.setQuestion(question); + // learner.learnSPARQLQueries(); + // System.out.println("Learned query:\n" + learner.getBestSPARQLQuery()); + // System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType()); + // System.out.println(learner.getLearnedPosition()); + // + // } + + + +} Deleted: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2HMM.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2HMM.java 2012-09-27 13:16:49 UTC (rev 3851) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2HMM.java 2012-09-27 13:18:05 UTC (rev 3852) @@ -1,1440 +0,0 @@ -package org.dllearner.algorithm.tbsl.learning; - -import hmm.HiddenMarkovModel; -import hmm.ResourceInfo; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.SortedMap; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; -import org.apache.commons.collections15.MultiMap; -import org.apache.log4j.Logger; -import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; -import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; -import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; -import org.dllearner.algorithm.tbsl.nlp.PlingStemmer; -import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; -import org.dllearner.algorithm.tbsl.nlp.WordNet; -import org.dllearner.algorithm.tbsl.sparql.Allocation; -import org.dllearner.algorithm.tbsl.sparql.Query; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Value; -import org.dllearner.algorithm.tbsl.sparql.Slot; -import org.dllearner.algorithm.tbsl.sparql.SlotType; -import org.dllearner.algorithm.tbsl.sparql.Template; -import org.dllearner.algorithm.tbsl.sparql.WeightedQuery; -import org.dllearner.algorithm.tbsl.templator.Templator; -import org.dllearner.algorithm.tbsl.util.Knowledgebase; -import org.dllearner.algorithm.tbsl.util.PopularityMap; -import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType; -import org.dllearner.algorithm.tbsl.util.Similarity; -import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper.SymPropertyDirection; -import org.dllearner.common.index.Index; -import org.dllearner.common.index.IndexResultItem; -import org.dllearner.common.index.IndexResultSet; -import org.dllearner.common.index.MappingBasedIndex; -import org.dllearner.common.index.SOLRIndex; -import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; -import org.dllearner.common.index.SPARQLIndex; -import org.dllearner.common.index.SPARQLObjectPropertiesIndex; -import org.dllearner.common.index.SPARQLPropertiesIndex; -import org.dllearner.common.index.VirtuosoDatatypePropertiesIndex; -import org.dllearner.common.index.VirtuosoObjectPropertiesIndex; -import org.dllearner.common.index.VirtuosoPropertiesIndex; -import org.dllearner.core.ComponentInitException; -import org.dllearner.core.LearningProblem; -import org.dllearner.core.SparqlQueryLearningAlgorithm; -import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.NamedClass; -import org.dllearner.core.owl.ObjectProperty; -import org.dllearner.core.owl.Thing; -import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; -import org.dllearner.kb.SparqlEndpointKS; -import org.dllearner.kb.sparql.ExtractionDBCache; -import org.dllearner.kb.sparql.SparqlEndpoint; -import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.reasoning.SPARQLReasoner; -import org.ini4j.InvalidFileFormatException; -import org.ini4j.Options; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; -import com.hp.hpl.jena.ontology.OntModelSpec; -import com.hp.hpl.jena.query.QueryExecutionFactory; -import com.hp.hpl.jena.query.QueryFactory; -import com.hp.hpl.jena.query.QuerySolution; -import com.hp.hpl.jena.query.ResultSet; -import com.hp.hpl.jena.query.Syntax; -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; -import com.hp.hpl.jena.sparql.expr.ExprAggregator; -import com.hp.hpl.jena.s... [truncated message content] |
From: <ki...@us...> - 2012-09-27 13:17:02
|
Revision: 3851 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3851&view=rev Author: kirdie Date: 2012-09-27 13:16:49 +0000 (Thu, 27 Sep 2012) Log Message: ----------- Added Paths: ----------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2HMM.java Removed Paths: ------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java Deleted: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-27 09:39:55 UTC (rev 3850) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-27 13:16:49 UTC (rev 3851) @@ -1,972 +0,0 @@ -package org.dllearner.algorithm.tbsl.learning; - -import hmm.HiddenMarkovModel; -import hmm.ResourceInfo; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; -import org.apache.commons.collections15.MultiMap; -import org.apache.log4j.Logger; -import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; -import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; -import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; -import org.dllearner.algorithm.tbsl.nlp.PlingStemmer; -import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; -import org.dllearner.algorithm.tbsl.nlp.WordNet; -import org.dllearner.algorithm.tbsl.sparql.Allocation; -import org.dllearner.algorithm.tbsl.sparql.Query; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; -import org.dllearner.algorithm.tbsl.sparql.Slot; -import org.dllearner.algorithm.tbsl.sparql.SlotType; -import org.dllearner.algorithm.tbsl.sparql.Template; -import org.dllearner.algorithm.tbsl.sparql.WeightedQuery; -import org.dllearner.algorithm.tbsl.templator.Templator; -import org.dllearner.algorithm.tbsl.util.Knowledgebase; -import org.dllearner.algorithm.tbsl.util.PopularityMap; -import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType; -import org.dllearner.algorithm.tbsl.util.Similarity; -import org.dllearner.common.index.Index; -import org.dllearner.common.index.IndexResultItem; -import org.dllearner.common.index.IndexResultSet; -import org.dllearner.common.index.MappingBasedIndex; -import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; -import org.dllearner.common.index.SPARQLIndex; -import org.dllearner.common.index.SPARQLObjectPropertiesIndex; -import org.dllearner.common.index.SPARQLPropertiesIndex; -import org.dllearner.common.index.VirtuosoDatatypePropertiesIndex; -import org.dllearner.common.index.VirtuosoObjectPropertiesIndex; -import org.dllearner.common.index.VirtuosoPropertiesIndex; -import org.dllearner.core.ComponentInitException; -import org.dllearner.core.LearningProblem; -import org.dllearner.core.SparqlQueryLearningAlgorithm; -import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; -import org.dllearner.kb.SparqlEndpointKS; -import org.dllearner.kb.sparql.ExtractionDBCache; -import org.dllearner.kb.sparql.SparqlEndpoint; -import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.reasoning.SPARQLReasoner; -import org.ini4j.Options; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; -import com.hp.hpl.jena.ontology.OntModelSpec; -import com.hp.hpl.jena.query.QueryExecutionFactory; -import com.hp.hpl.jena.query.QueryFactory; -import com.hp.hpl.jena.query.QuerySolution; -import com.hp.hpl.jena.query.ResultSet; -import com.hp.hpl.jena.query.Syntax; -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; -import com.jamonapi.Monitor; -import com.jamonapi.MonitorFactory; - -/** The old learner taken over by Konrad Höffner for experiments with the Hidden Markov Algorithm by Saedeeh Shekarpur. - * - * */ -public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm -{ - enum Mode {BEST_QUERY, BEST_NON_EMPTY_QUERY} - private Mode mode = Mode.BEST_QUERY; - - /** used to create a label out of the URI when there is no label available in the SPARQL endpoint.*/ - private static SimpleIRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); - - private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); - /** synonyms are great but are not used yet by the HMM algorithm. **/ - private static final boolean CREATE_SYNONYMS = false; - /** The minimum score of items that are accepted from the Sindice search BOA index. **/ - private static final Double BOA_THRESHOLD = 0.9; - private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); - private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); - - private boolean useRemoteEndpointValidation; - private boolean stopIfQueryResultNotEmpty; - private int maxTestedQueriesPerTemplate = 50; - private int maxQueryExecutionTimeInSeconds; - private int maxTestedQueries = 200; - private int maxIndexResults; - - private SparqlEndpoint endpoint = null; - private Model model = null; - - private ExtractionDBCache cache = new ExtractionDBCache("cache"); - - private Index resourcesIndex; - private Index classesIndex; - private Index propertiesIndex; - - private Index datatypePropertiesIndex; - private Index objectPropertiesIndex; - - private MappingBasedIndex mappingIndex; - - private Templator templateGenerator = null; - private Lemmatizer lemmatizer; - private PartOfSpeechTagger posTagger; - private WordNet wordNet; - - private String question; - private int learnedPos = -1; - - private Set<Template> templates; - private Map<Template, Collection<? extends Query>> template2Queries; - private Map<Slot, List<String>> slot2URI; - - private Collection<WeightedQuery> sparqlQueryCandidates; - private SortedSet<WeightedQuery> learnedSPARQLQueries; - private SortedSet<WeightedQuery> generatedQueries; - - private SPARQLReasoner reasoner; - - private String currentlyExecutedQuery; - - private boolean dropZeroScoredQueries = true; - private boolean useManualMappingsIfExistOnly = true; - - private boolean multiThreaded = true; - - private String [] grammarFiles = new String[]{"tbsl/lexicon/english.lex"}; - - private PopularityMap popularityMap; - - private Set<String> relevantKeywords; - - private boolean useDomainRangeRestriction = true; - - public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ - this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); - } - - public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ - this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), posTagger, wordNet, options); - } - - public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase){ - this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), new StanfordPartOfSpeechTagger(), new WordNet(), new Options()); - } - - public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index){ - this(endpoint, index, new StanfordPartOfSpeechTagger()); - } - - public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){ - this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options()); - } - - public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger){ - this(endpoint, index, posTagger, new WordNet(), new Options()); - } - - public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){ - this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options()); - } - - public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, WordNet wordNet){ - this(endpoint, index, new StanfordPartOfSpeechTagger(), wordNet, new Options()); - } - - public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet){ - this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, new Options(), new ExtractionDBCache("cache")); - } - - public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet){ - this(endpoint, index, index, index, posTagger, wordNet, new Options(), new ExtractionDBCache("cache")); - } - - public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ - this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); - } - - public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ - this(endpoint, index, index, index, posTagger, wordNet, options, new ExtractionDBCache("cache")); - } - - public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ - this.endpoint = endpoint; - this.resourcesIndex = resourcesIndex; - this.classesIndex = classesIndex; - this.propertiesIndex = propertiesIndex; - this.posTagger = posTagger; - this.wordNet = wordNet; - this.cache = cache; - - setOptions(options); - - if(propertiesIndex instanceof SPARQLPropertiesIndex){ - if(propertiesIndex instanceof VirtuosoPropertiesIndex){ - datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - } else { - datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - } - } else { - datatypePropertiesIndex = propertiesIndex; - objectPropertiesIndex = propertiesIndex; - } - reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cache); - } - - public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ - this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); - } - - public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){ - this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options()); - } - - public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){ - this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options()); - } - - public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ - this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); - } - - public SPARQLTemplateBasedLearner2(Model model, MappingBasedIndex mappingBasedIndex, PartOfSpeechTagger posTagger) - { - this(model, new SPARQLIndex(model),new SPARQLIndex(model),new SPARQLIndex(model),posTagger); - setMappingIndex(mappingBasedIndex); - } - - public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ - this.model = model; - this.resourcesIndex = resourcesIndex; - this.classesIndex = classesIndex; - this.propertiesIndex = propertiesIndex; - this.posTagger = posTagger; - this.wordNet = wordNet; - this.cache = cache; - - setOptions(options); - - if(propertiesIndex instanceof SPARQLPropertiesIndex){ - if(propertiesIndex instanceof VirtuosoPropertiesIndex){ - datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - } else { - datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - } - } else { - datatypePropertiesIndex = propertiesIndex; - objectPropertiesIndex = propertiesIndex; - } - reasoner = new SPARQLReasoner(new LocalModelBasedSparqlEndpointKS(ModelFactory.createOntologyModel(OntModelSpec.RDFS_MEM, model)), cache); - } - - public void setGrammarFiles(String[] grammarFiles) - { - if(templateGenerator==null) {throw new AssertionError("Learner not initialized. Please call init();");} - templateGenerator.setGrammarFiles(grammarFiles); - } - - @Override - public void init() throws ComponentInitException { - templateGenerator = new Templator(posTagger, wordNet, grammarFiles); - lemmatizer = new LingPipeLemmatizer(); - } - - public void setMappingIndex(MappingBasedIndex mappingIndex) { - this.mappingIndex = mappingIndex; - } - - public void setCache(ExtractionDBCache cache) { - this.cache = cache; - } - - public void setKnowledgebase(Knowledgebase knowledgebase){ - this.endpoint = knowledgebase.getEndpoint(); - this.resourcesIndex = knowledgebase.getResourceIndex(); - this.classesIndex = knowledgebase.getClassIndex(); - this.propertiesIndex = knowledgebase.getPropertyIndex(); - this.mappingIndex = knowledgebase.getMappingIndex(); - if(propertiesIndex instanceof SPARQLPropertiesIndex){ - if(propertiesIndex instanceof VirtuosoPropertiesIndex){ - datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - } else { - datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - } - } else { - datatypePropertiesIndex = propertiesIndex; - objectPropertiesIndex = propertiesIndex; - } - reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); - } - - public void setUseDomainRangeRestriction(boolean useDomainRangeRestriction) { - this.useDomainRangeRestriction = useDomainRangeRestriction; - } - - /* - * Only for Evaluation useful. - */ - public void setUseIdealTagger(boolean value){ - templateGenerator.setUNTAGGED_INPUT(!value); - } - - private void setOptions(Options options){ - maxIndexResults = Integer.parseInt(options.get("solr.query.limit", "10")); - - maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20")); - cache.setMaxExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds); - - useRemoteEndpointValidation = options.get("learning.validationType", "remote").equals("remote") ? true : false; - stopIfQueryResultNotEmpty = Boolean.parseBoolean(options.get("learning.stopAfterFirstNonEmptyQueryResult", "true")); - maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20")); - - String wordnetPath = options.get("wordnet.dictionary", "tbsl/dict"); - wordnetPath = this.getClass().getClassLoader().getResource(wordnetPath).getPath(); - System.setProperty("wordnet.database.dir", wordnetPath); - } - - public void setEndpoint(SparqlEndpoint endpoint){ - this.endpoint = endpoint; - - reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); - reasoner.setCache(cache); - reasoner.prepareSubsumptionHierarchy(); - } - - public void setQuestion(String question){ - this.question = question; - } - - public void setUseRemoteEndpointValidation(boolean useRemoteEndpointValidation){ - this.useRemoteEndpointValidation = useRemoteEndpointValidation; - } - - public int getMaxQueryExecutionTimeInSeconds() { - return maxQueryExecutionTimeInSeconds; - } - - public void setMaxQueryExecutionTimeInSeconds(int maxQueryExecutionTimeInSeconds) { - this.maxQueryExecutionTimeInSeconds = maxQueryExecutionTimeInSeconds; - } - - public int getMaxTestedQueriesPerTemplate() { - return maxTestedQueriesPerTemplate; - } - - public void setMaxTestedQueriesPerTemplate(int maxTestedQueriesPerTemplate) { - this.maxTestedQueriesPerTemplate = maxTestedQueriesPerTemplate; - } - - private void reset(){ - learnedSPARQLQueries = new TreeSet<WeightedQuery>(); - template2Queries = new HashMap<Template, Collection<? extends Query>>(); - slot2URI = new HashMap<Slot, List<String>>(); - relevantKeywords = new HashSet<String>(); - currentlyExecutedQuery = null; - - // templateMon.reset(); - // sparqlMon.reset(); - } - - public void learnSPARQLQueries() throws NoTemplateFoundException{ - reset(); - //generate SPARQL query templates - logger.debug("Generating SPARQL query templates..."); - templateMon.start(); - if(multiThreaded){ - templates = templateGenerator.buildTemplatesMultiThreaded(question,CREATE_SYNONYMS); - } else { - templates = templateGenerator.buildTemplates(question); - } - templateMon.stop(); - logger.debug("Done in " + templateMon.getLastValue() + "ms."); - relevantKeywords.addAll(templateGenerator.getUnknownWords()); - if(templates.isEmpty()){ - throw new NoTemplateFoundException(); - - } - logger.debug("Templates:"); - for(Template t : templates){ - logger.debug(t); - } - - //get the weighted query candidates - generatedQueries = getWeightedSPARQLQueries(templates); - sparqlQueryCandidates = new ArrayList<WeightedQuery>(); - int i = 0; - for(WeightedQuery wQ : generatedQueries){ - logger.debug(wQ.explain()); - sparqlQueryCandidates.add(wQ); - if(i == maxTestedQueries){ - break; - } - i++; - } - - if(mode == Mode.BEST_QUERY){ - double bestScore = -1; - for(WeightedQuery candidate : generatedQueries){ - double score = candidate.getScore(); - if(score >= bestScore){ - bestScore = score; - learnedSPARQLQueries.add(candidate); - } else { - break; - } - } - } else if(mode == Mode.BEST_NON_EMPTY_QUERY){ - //test candidates - if(useRemoteEndpointValidation){ //on remote endpoint - validateAgainstRemoteEndpoint(sparqlQueryCandidates); - } else {//on local model - - } - } - } - - public SortedSet<WeightedQuery> getGeneratedQueries() { - return generatedQueries; - } - - public SortedSet<WeightedQuery> getGeneratedQueries(int topN) { - SortedSet<WeightedQuery> topNQueries = new TreeSet<WeightedQuery>(); - int max = Math.min(topN, generatedQueries.size()); - for(WeightedQuery wQ : generatedQueries){ - topNQueries.add(wQ); - if(topNQueries.size() == max){ - break; - } - } - return topNQueries; - } - - public Set<Template> getTemplates(){ - return templates; - } - - public List<String> getGeneratedSPARQLQueries(){ - List<String> queries = new ArrayList<String>(); - for(WeightedQuery wQ : sparqlQueryCandidates){ - queries.add(wQ.getQuery().toString()); - } - - return queries; - } - - public Map<Template, Collection<? extends Query>> getTemplates2SPARQLQueries(){ - return template2Queries; - } - - public Map<Slot, List<String>> getSlot2URIs(){ - return slot2URI; - } - - private void normProminenceValues(Set<Allocation> allocations){ - double min = 0; - double max = 0; - for(Allocation a : allocations){ - if(a.getProminence() < min){ - min = a.getProminence(); - } - if(a.getProminence() > max){ - max = a.getProminence(); - } - } - if(min==max) {return;} - for(Allocation a : allocations){ - double prominence = a.getProminence()/(max-min); - a.setProminence(prominence); - } - } - - private void computeScore(Set<Allocation> allocations){ - double alpha = 0.8; - double beta = 1 - alpha; - - for(Allocation a : allocations){ - double score = alpha * a.getSimilarity() + beta * a.getProminence(); - a.setScore(score); - } - - } - - public Set<String> getRelevantKeywords(){ - return relevantKeywords; - } - - // just for testing the HMM integration, getWeightedSPARQLQueriesOld is the original one - private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates) - { - // for testing - for(Template template: templates) - { - { - ArrayList<String> keywords = new ArrayList<String>(); - for(Slot slot: template.getSlots()) - { - keywords.add(slot.getWords().get(0)); - } - if(template.getSlots().size()!=3) {continue;} -// if(!keywords.contains("Mean Hamster Software")) {continue;} -// if(!keywords.contains("published")) {continue;} - System.out.println("\"keywords\": "+keywords); - } - System.out.println(template); - SortedSet<WeightedQuery> queries = new TreeSet<WeightedQuery>(); - Query query = template.getQuery(); - double score = 0; - - Map<List<String>,List<ResourceInfo>> segmentToURIs = new HashMap<List<String>,List<ResourceInfo>>(); - Map<String,IndexResultItem> uriUniqueToResultItem = new HashMap<String,IndexResultItem>(); - for(Slot slot: template.getSlots()) - { - List<String> segment = new LinkedList<String>(); - segment.addAll(Arrays.asList(slot.getWords().get(0).split("\\s"))); - List<ResourceInfo> resourceInfos = new LinkedList<ResourceInfo>(); - - for(IndexResultItem item : getIndexResultItems(slot)) - { - // if this gets used at another place, create a function IndexResultItemToResourceInfo() - ResourceInfo info = new ResourceInfo(); - info.setUri(item.getUri()); - String label = item.getLabel(); - // in dbpedia, the last part of the uri is transformed from the english label, reverse the transformation (should almost always work for dbpedia article resources) - info.setLabel(label!=null?label:sfp.getShortForm(IRI.create(item.getUri()))); - // in saedeehs algorithm, the emission probabilty is formed by the string similarity - // but we use the lucene index score - double max = 0; - for(String word: slot.getWords()) {max = Math.max(max, Similarity.getSimilarity(word, info.getLabel()));} - if(max<0||max>1) throw new AssertionError("max is not in [0,1], max="+max); - info.setStringSimilarityScore(max); - if(!info.setTypeFromDBpediaURI()) throw new AssertionError("could not set type for info "+info); - System.err.println("info with type: "+info); - resourceInfos.add(info); - } - segmentToURIs.put(segment,resourceInfos); - } - HiddenMarkovModel hmm = new HiddenMarkovModel(); - hmm.initialization(); - hmm.startMarkovModel(segmentToURIs,true); - MultiMap<Double,List<String>> paths = hmm.getPaths(); - - // System.out.println(hmm.getPaths()); - // die keywords jetzt in sadeehs algorithmus reinwerfen - // da kommen jetzt pfade raus mit unterschiedlichen wahrscheinlichkeiten - // HiddenMarkovModel HMM = new HiddenMarkovModel(); - // HMM.StartMarkovModel(); - // jetzt die variablen aus der query ersetzen mit den kandidaten - // ranked list der pfade, die die observation sequence generieren - - for(Double d : paths.keySet()) - { - for(List<String> path : paths.get(d)) - { - Query q = new Query(query); - // TODO: which variable stands for which resource? do it randomly now to check if the replacement works and then correct the order later - System.out.println(q.getVariablesAsStringList()); - System.out.println(); - int i = 0; - for(String var : q.getVariablesAsStringList()) - { - q.replaceVarWithURI(var, path.get(i)); - i++; - } - System.out.println(q); - - - WeightedQuery wQuery = new WeightedQuery(q, score); - queries.add(wQuery); - } - } - //System.exit(0); - return queries; - // >> SLOTS: - // y0: RESOURCE {Mean Hamster Software} - // p0: OBJECTPROPERTY {published,print} - // p1: CLASS {video games} - - - // System.out.println(template); - } - // - return null; - } - - private double getProminenceValue(String uri, SlotType type){ - Integer popularity = null; - if(popularityMap != null){ - if(type == SlotType.CLASS){ - popularity = popularityMap.getPopularity(uri, EntityType.CLASS); - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY - || type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){ - popularity = popularityMap.getPopularity(uri, EntityType.PROPERTY); - } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ - popularity = popularityMap.getPopularity(uri, EntityType.RESOURCE); - } - } - if(popularity == null){ - String query = null; - if(type == SlotType.CLASS){ - query = "SELECT COUNT(?s) WHERE {?s a <%s>}"; - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY - || type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){ - query = "SELECT COUNT(*) WHERE {?s <%s> ?o}"; - } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ - query = "SELECT COUNT(*) WHERE {?s ?p <%s>}"; - } - query = String.format(query, uri); - - ResultSet rs = executeSelect(query); - QuerySolution qs; - String projectionVar; - while(rs.hasNext()){ - qs = rs.next(); - projectionVar = qs.varNames().next(); - popularity = qs.get(projectionVar).asLiteral().getInt(); - } - } - if(popularity == null){ - popularity = Integer.valueOf(0); - } - System.out.println(popularity); - - - // if(cnt == 0){ - // return 0; - // } - // return Math.log(cnt); - if(popularity!=popularity) {throw new AssertionError("prominence NaN for uri "+uri+", slot type "+type);} - return popularity; - } - - public void setPopularityMap(PopularityMap popularityMap) { - this.popularityMap = popularityMap; - } - - - private List<String> pruneList(List<String> words){ - List<String> prunedList = new ArrayList<String>(); - for(String w1 : words){ - boolean smallest = true; - for(String w2 : words){ - if(!w1.equals(w2)){ - if(w1.contains(w2)){ - smallest = false; - break; - } - } - } - if(smallest){ - prunedList.add(w1); - } - } - logger.info("Pruned list: " + prunedList); - // return getLemmatizedWords(words); - return prunedList; - } - - private List<String> getLemmatizedWords(List<String> words){ - logger.info("Pruning word list " + words + "..."); - // mon.start(); - List<String> pruned = new ArrayList<String>(); - for(String word : words){ - //currently only stem single words - if(word.contains(" ")){ - pruned.add(word); - } else { - String lemWord = lemmatizer.stem(word); - if(!pruned.contains(lemWord)){ - pruned.add(lemWord); - } - } - - } - // mon.stop(); - // logger.info("Done in " + mon.getLastValue() + "ms."); - logger.info("Pruned list: " + pruned); - return pruned; - } - - - private Index getIndexBySlotType(Slot slot){ - Index index = null; - SlotType type = slot.getSlotType(); - if(type == SlotType.CLASS){ - index = classesIndex; - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ - index = propertiesIndex; - } else if(type == SlotType.DATATYPEPROPERTY){ - index = datatypePropertiesIndex; - } else if(type == SlotType.OBJECTPROPERTY){ - index = objectPropertiesIndex; - } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ - index = resourcesIndex; - } - return index; - } - - private void validateAgainstRemoteEndpoint(Collection<WeightedQuery> queries){ - SPARQL_QueryType queryType = queries.iterator().next().getQuery().getQt(); - validate(queries, queryType); - } - - private void validate(Collection<WeightedQuery> queries, SPARQL_QueryType queryType){ - logger.debug("Testing candidate SPARQL queries on remote endpoint..."); - sparqlMon.start(); - if(queryType == SPARQL_QueryType.SELECT){ - for(WeightedQuery query : queries){ - learnedPos++; - List<String> results; - try { - logger.debug("Testing query:\n" + query); - com.hp.hpl.jena.query.Query q = QueryFactory.create(query.getQuery().toString(), Syntax.syntaxARQ); - q.setLimit(1); - ResultSet rs = executeSelect(q.toString()); - - results = new ArrayList<String>(); - QuerySolution qs; - String projectionVar; - while(rs.hasNext()){ - qs = rs.next(); - projectionVar = qs.varNames().next(); - if(qs.get(projectionVar).isLiteral()){ - results.add(qs.get(projectionVar).asLiteral().getLexicalForm()); - } else if(qs.get(projectionVar).isURIResource()){ - results.add(qs.get(projectionVar).asResource().getURI()); - } - - } - if(!results.isEmpty()){ - try{ - int cnt = Integer.parseInt(results.get(0)); - if(cnt > 0){ - learnedSPARQLQueries.add(query); - if(stopIfQueryResultNotEmpty){ - return; - } - } - } catch (NumberFormatException e){ - learnedSPARQLQueries.add(query); - if(stopIfQueryResultNotEmpty){ - return; - } - } - logger.debug("Result: " + results); - } - } catch (Exception e) { - e.printStackTrace(); - } - - } - } else if(queryType == SPARQL_QueryType.ASK){ - for(WeightedQuery query : queries){ - learnedPos++; - logger.debug("Testing query:\n" + query); - boolean result = executeAskQuery(query.getQuery().toString()); - learnedSPARQLQueries.add(query); - // if(stopIfQueryResultNotEmpty && result){ - // return; - // } - if(stopIfQueryResultNotEmpty){ - return; - } - logger.debug("Result: " + result); - } - } - - sparqlMon.stop(); - logger.debug("Done in " + sparqlMon.getLastValue() + "ms."); - } - - private boolean executeAskQuery(String query) - { - if(query==null) throw new NullPointerException("Parameter query == null"); - currentlyExecutedQuery = query; - - boolean ret; - if (model == null) - { - QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); - qe.setDefaultGraphURIs(endpoint.getDefaultGraphURIs()); - ret = qe.execAsk(); - } - else {ret = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model).execAsk();} - return ret; - } - - private ResultSet executeSelect(String query) - { - if(query==null) throw new NullPointerException("Parameter query == null"); - currentlyExecutedQuery = query; - ResultSet rs; - if (model == null) { - if (cache == null) { - QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); - qe.setDefaultGraphURIs(endpoint.getDefaultGraphURIs()); - rs = qe.execSelect(); - } else { - rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - } - } else { - rs = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model) - .execSelect(); - } - - return rs; - } - - public String getCurrentlyExecutedQuery() { - return currentlyExecutedQuery; - } - - public int getLearnedPosition() { - if(learnedPos >= 0){ - return learnedPos+1; - } - return learnedPos; - } - - @Override - public void start() { - } - - @Override - public List<String> getCurrentlyBestSPARQLQueries(int nrOfSPARQLQueries) { - List<String> bestQueries = new ArrayList<String>(); - for(WeightedQuery wQ : learnedSPARQLQueries){ - bestQueries.add(wQ.getQuery().toString()); - } - return bestQueries; - } - - @Override - public String getBestSPARQLQuery() { - if(!learnedSPARQLQueries.isEmpty()){ - return learnedSPARQLQueries.iterator().next().getQuery().toString(); - } else { - return null; - } - } - - public SortedSet<WeightedQuery> getLearnedSPARQLQueries() { - return learnedSPARQLQueries; - } - - @Override - public LearningProblem getLearningProblem() { - // TODO Auto-generated method stub - return null; - } - - @Override - public void setLearningProblem(LearningProblem learningProblem) { - // TODO Auto-generated method stub - - } - - private Set<IndexResultItem> getIndexResultItems(Slot slot) - { - // List<String> uris = new LinkedList<String>(); - Set<IndexResultItem> indexResultItems = new HashSet<IndexResultItem>(); - - Index index = getIndexBySlotType(slot); - - for(String word : slot.getWords()) - { - IndexResultSet rs = new IndexResultSet(); - if(mappingIndex != null){ - SlotType type = slot.getSlotType(); - if(type == SlotType.CLASS){ - rs.add(mappingIndex.getClassesWithScores(word)); - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ - rs.add(mappingIndex.getPropertiesWithScores(word)); - } else if(type == SlotType.DATATYPEPROPERTY){ - rs.add(mappingIndex.getDatatypePropertiesWithScores(word)); - } else if(type == SlotType.OBJECTPROPERTY){ - rs.add(mappingIndex.getObjectPropertiesWithScores(word)); - } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ - rs.add(mappingIndex.getResourcesWithScores(word)); - } - } - //use the non manual indexes only if mapping based resultset is not empty and option is set - if(!useManualMappingsIfExistOnly || rs.isEmpty()){ - if(slot.getSlotType() == SlotType.RESOURCE){ - rs.add(index.getResourcesWithScores(word, 20,0)); - } else { - if(slot.getSlotType() == SlotType.CLASS){ - word = PlingStemmer.stem(word); - } - IndexResultSet tmp = index.getResourcesWithScores(word, 20,0,Collections.singleton("boa-score")); - for(IndexResultItem item : tmp.getItems()) - {System.out.println(item); - Double boaScore = (Double) item.getFields().get("boa-score"); - if(boaScore==null||boaScore>BOA_THRESHOLD) rs.addItem(item); - } - } - } - // for(IndexResultItem item: rs.getItems()) - // { - // uris.add(item.getUri()); - // } - indexResultItems.addAll(rs.getItems()); - } - return indexResultItems; - } - - - public String getTaggedInput() - { - if(templateGenerator==null) {throw new AssertionError("Learner not initialized. Please call init();");} - return templateGenerator.getTaggedInput(); - } - - private boolean isDatatypeProperty(String uri){ - Boolean isDatatypeProperty = null; - if(mappingIndex != null){ - isDatatypeProperty = mappingIndex.isDataProperty(uri); - } - if(isDatatypeProperty == null){ - String query = String.format("ASK {<%s> a <http://www.w3.org/2002/07/owl#DatatypeProperty> .}", uri); - isDatatypeProperty = executeAskQuery(query); - } - return isDatatypeProperty; - } - - // /** - // * @param args - // * @throws NoTemplateFoundException - // * @throws IOException - // * @throws FileNotFoundException - // * @throws InvalidFileFormatException - // */ - // public static void main(String[] args) throws Exception { - // SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), - // Collections.<String>singletonList(""), Collections.<String>emptyList()); - // Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources"); - // Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes"); - // Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties"); - // - // SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); - // learner.init(); - // - // String question = "What is the highest mountain?"; - // - // learner.setQuestion(question); - // learner.learnSPARQLQueries(); - // System.out.println("Learned query:\n" + learner.getBestSPARQLQuery()); - // System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType()); - // System.out.println(learner.getLearnedPosition()); - // - // } - - - -} Copied: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2HMM.java (from rev 3849, branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java) =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2HMM.java (rev 0) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2HMM.java 2012-09-27 13:16:49 UTC (rev 3851) @@ -0,0 +1,1440 @@ +package org.dllearner.algorithm.tbsl.learning; + +import hmm.HiddenMarkovModel; +import hmm.ResourceInfo; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; +import org.apache.commons.collections15.MultiMap; +import org.apache.log4j.Logger; +import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; +import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; +import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.PlingStemmer; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.WordNet; +import org.dllearner.algorithm.tbsl.sparql.Allocation; +import org.dllearner.algorithm.tbsl.sparql.Query; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Value; +import org.dllearner.algorithm.tbsl.sparql.Slot; +import org.dllearner.algorithm.tbsl.sparql.SlotType; +import org.dllearner.algorithm.tbsl.sparql.Template; +import org.dllearner.algorithm.tbsl.sparql.WeightedQuery; +import org.dllearner.algorithm.tbsl.templator.Templator; +import org.dllearner.algorithm.tbsl.util.Knowledgebase; +import org.dllearner.algorithm.tbsl.util.PopularityMap; +import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType; +import org.dllearner.algorithm.tbsl.util.Similarity; +import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper.SymPropertyDirection; +import org.dllearner.common.index.Index; +import org.dllearner.common.index.IndexResultItem; +import org.dllearner.common.index.IndexResultSet; +import org.dllearner.common.index.MappingBasedIndex; +import org.dllearner.common.index.SOLRIndex; +import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; +import org.dllearner.common.index.SPARQLIndex; +import org.dllearner.common.index.SPARQLObjectPropertiesIndex; +import org.dllearner.common.index.SPARQLPropertiesIndex; +import org.dllearner.common.index.VirtuosoDatatypePropertiesIndex; +import org.dllearner.common.index.VirtuosoObjectPropertiesIndex; +import org.dllearner.common.index.VirtuosoPropertiesIndex; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.LearningProblem; +import org.dllearner.core.SparqlQueryLearningAlgorithm; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.core.owl.Thing; +import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; +import org.dllearner.kb.SparqlEndpointKS; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.reasoning.SPARQLReasoner; +import org.ini4j.InvalidFileFormatException; +import org.ini4j.Options; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; +import com.hp.hpl.jena.ontology.OntModelSpec; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.Syntax; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; +import com.hp.hpl.jena.sparql.expr.ExprAggregator; +import com.hp.hpl.jena.sparql.expr.ExprVar; +import com.hp.hpl.jena.sparql.expr.aggregate.AggCount; +import com.hp.hpl.jena.sparql.expr.aggregate.Aggregator; +import com.jamonapi.Monitor; +import com.jamonapi.MonitorFactory; + +/** The old learner taken over by Konrad Höffner for experiments with the Hidden Markov Algorithm by Saedeeh Shekarpur. + * + * */ +public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm +{ + public static boolean useHMM = true; + + enum Mode {BEST_QUERY, BEST_NON_EMPTY_QUERY} + private Mode mode = Mode.BEST_QUERY; + + /** used to create a label out of the URI when there is no label available in the SPARQL endpoint.*/ + private static SimpleIRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); + + private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); + /** synonyms are great but are not used yet by the HMM algorithm. **/ + private static final boolean CREATE_SYNONYMS = false; + /** The minimum score of items that are accepted from the Sindice search BOA index. **/ + private static final Double BOA_THRESHOLD = 0.9; + private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); + private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); + + private boolean useRemoteEndpointValidation; + private boolean stopIfQueryResultNotEmpty; + private int maxTestedQueriesPerTemplate = 50; + private int maxQueryExecutionTimeInSeconds; + private int maxTestedQueries = 200; + private int maxIndexResults; + + private SparqlEndpoint endpoint = null; + private Model model = null; + + private ExtractionDBCache cache = new ExtractionDBCache("cache"); + + private Index resourcesIndex; + private Index classesIndex; + private Index propertiesIndex; + + private Index datatypePropertiesIndex; + private Index objectPropertiesIndex; + + private MappingBasedIndex mappingIndex; + + private Templator templateGenerator = null; + private Lemmatizer lemmatizer; + private PartOfSpeechTagger posTagger; + private WordNet wordNet; + + private String question; + private int learnedPos = -1; + + private Set<Template> templates; + private Map<Template, Collection<? extends Query>> template2Queries; + private Map<Slot, List<String>> slot2URI; + + private Collection<WeightedQuery> sparqlQueryCandidates; + private SortedSet<WeightedQuery> learnedSPARQLQueries; + private SortedSet<WeightedQuery> generatedQueries; + + private SPARQLReasoner reasoner; + + private String currentlyExecutedQuery; + + private boolean dropZeroScoredQueries = true; + private boolean useManualMappingsIfExistOnly = true; + + private boolean multiThreaded = true; + + private String [] grammarFiles = new String[]{"tbsl/lexicon/english.lex"}; + + private PopularityMap popularityMap; + + private Set<String> relevantKeywords; + + private boolean useDomainRangeRestriction = true; + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); + } + + public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), posTagger, wordNet, options); + } + + public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase){ + this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), new StanfordPartOfSpeechTagger(), new WordNet(), new Options()); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index){ + this(endpoint, index, new StanfordPartOfSpeechTagger()); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options()); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger){ + this(endpoint, index, posTagger, new WordNet(), new Options()); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options()); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, WordNet wordNet){ + this(endpoint, index, new StanfordPartOfSpeechTagger(), wordNet, new Options()); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, new Options(), new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet){ + this(endpoint, index, index, index, posTagger, wordNet, new Options(), new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(endpoint, index, index, index, posTagger, wordNet, options, new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ + this.endpoint = endpoint; + this.resourcesIndex = resourcesIndex; + this.classesIndex = classesIndex; + this.propertiesIndex = propertiesIndex; + this.posTagger = posTagger; + this.wordNet = wordNet; + this.cache = cache; + + setOptions(options); + + if(propertiesIndex instanceof SPARQLPropertiesIndex){ + if(propertiesIndex instanceof VirtuosoPropertiesIndex){ + datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } + } else { + datatypePropertiesIndex = propertiesIndex; + objectPropertiesIndex = propertiesIndex; + } + reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cache); + } + + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ + this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); + } + + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){ + this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options()); + } + + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){ + this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options()); + } + + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner2(Model model, MappingBasedIndex mappingBasedIndex, PartOfSpeechTagger posTagger) + { + this(model, new SPARQLIndex(model),new SPARQLIndex(model),new SPARQLIndex(model),posTagger); + setMappingIndex(mappingBasedIndex); + } + + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ + this.model = model; + this.resourcesIndex = resourcesIndex; + this.classesIndex = classesIndex; + this.propertiesIndex = propertiesIndex; + this.posTagger = posTagger; + this.wordNet = wordNet; + this.cache = cache; + + setOptions(options); + + if(propertiesIndex instanceof SPARQLPropertiesIndex){ + if(propertiesIndex instanceof VirtuosoPropertiesIndex){ + datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } + } else { + datatypePropertiesIndex = propertiesIndex; + objectPropertiesIndex = propertiesIndex; + } + reasoner = new SPARQLReasoner(new LocalModelBasedSparqlEndpointKS(ModelFactory.createOntologyModel(OntModelSpec.RDFS_MEM, model)), cache); + } + + public void setGrammarFiles(String[] grammarFiles) + { + if(templateGenerator==null) {throw new AssertionError("Learner not initialized. Please call init();");} + templateGenerator.setGrammarFiles(grammarFiles); + } + + @Override + public void init() throws ComponentInitException { + templateGenerator = new Templator(posTagger, wordNet, grammarFiles); + lemmatizer = new LingPipeLemmatizer(); + } + + public void setMappingIndex(MappingBasedIndex mappingIndex) { + this.mappingIndex = mappingIndex; + } + + public void setCache(ExtractionDBCache cache) { + this.cache = cache; + } + + public void setKnowledgebase(Knowledgebase knowledgebase){ + this.endpoint = knowledgebase.getEndpoint(); + this.resourcesIndex = knowledgebase.getResourceIndex(); + this.classesIndex = knowledgebase.getClassIndex(); + this.propertiesIndex = knowledgebase.getPropertyIndex(); + this.mappingIndex = knowledgebase.getMappingIndex(); + if(propertiesIndex instanceof SPARQLPropertiesIndex){ + if(propertiesIndex instanceof VirtuosoPropertiesIndex){ + datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } + } else { + datatypePropertiesIndex = propertiesIndex; + objectPropertiesIndex = propertiesIndex; + } + reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); + } + + public void setUseDomainRangeRestriction(boolean useDomainRangeRestriction) { + this.useDomainRangeRestriction = useDomainRangeRestriction; + } + + /* + * Only for Evaluation useful. + */ + public void setUseIdealTagger(boolean value){ + templateGenerator.setUNTAGGED_INPUT(!value); + } + + private void setOptions(Options options){ + maxIndexResults = Integer.parseInt(options.get("solr.query.limit", "10")); + + maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20")); + cache.setMaxExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds); + + useRemoteEndpointValidation = options.get("learning.validationType", "remote").equals("remote") ? true : false; + stopIfQueryResultNotEmpty = Boolean.parseBoolean(options.get("learning.stopAfterFirstNonEmptyQueryResult", "true")); + maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20")); + + String wordnetPath = options.get("wordnet.dictionary", "tbsl/dict"); + wordnetPath = this.getClass().getClassLoader().getResource(wordnetPath).getPath(); + System.setProperty("wordnet.database.dir", wordnetPath); + } + + public void setEndpoint(SparqlEndpoint endpoint){ + this.endpoint = endpoint; + + reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); + reasoner.setCache(cache); + reasoner.prepareSubsumptionHierarchy(); + } + + public void setQuestion(String question){ + this.question = question; + } + + public void setUseRemoteEndpointValidation(boolean useRemoteEndpointValidation){ + this.useRemoteEndpointValidation = useRemoteEndpointValidation; + } + + public int getMaxQueryExecutionTimeInSeconds() { + return maxQueryExecutionTimeInSeconds; + } + + public void setMaxQueryExecutionTimeInSeconds(int maxQueryExecutionTimeInSeconds) { + this.maxQueryExecutionTimeInSeconds = maxQueryExecutionTimeInSeconds; + } + + public int getMaxTestedQueriesPerTemplate() { + return maxTestedQueriesPerTemplate; + } + + public void setMaxTestedQueriesPerTemplate(int maxTestedQueriesPerTemplate) { + this.maxTestedQueriesPerTemplate = maxTestedQueriesPerTemplate; + } + + private void reset(){ + learnedSPARQLQueries = new TreeSet<WeightedQuery>(); + template2Queries = new HashMap<Template, Collection<? extends Query>>(); + slot2URI = new HashMap<Slot, List<String>>(); + relevantKeywords = new HashSet<String>(); + currentlyExecutedQuery = null; + + // templateMon.reset(); + // sparqlMon.reset(); + } + + public void learnSPARQLQueries() throws NoTemplateFoundException{ + reset(); + //generate SPARQL query templates + logger.debug("Generating SPARQL query templates..."); + templateMon.start(); + if(multiThreaded){ + templates = templateGenerator.buildTemplatesMultiThreaded(question,CREATE_SYNONYMS); + } else { + templates = templateGenerator.buildTemplates(question); + } + templateMon.stop(); + logger.debug("Done in " + templateMon.getLastValue() + "ms."); + relevantKeywords.addAll(templateGenerator.getUnknownWords()); + if(templates.isEmpty()){ + throw new NoTemplateFoundException(); + + } + logger.debug("Templates:"); + for(Template t : templates){ + logger.debug(t); + } + + //get the weighted query candidates + generatedQueries = getWeightedSPARQLQueries(templates); + sparqlQueryCandidates = new ArrayList<WeightedQuery>(); + int i = 0; + for(WeightedQuery wQ : generatedQueries){ + logger.debug(wQ.explain()); + sparqlQueryCandidates.add(wQ); + if(i == maxTestedQueries){ + break; + } + i++; + } + + if(mode == Mode.BEST_QUERY){ + double bestScore = -1; + for(WeightedQuery candidate : generatedQueries){ + double score = candidate.getScore(); + if(score >= bestScore){ + bestScore = score; + learnedSPARQLQueries.add(candidate); + } else { + break; + } + } + } else if(mode == Mode.BEST_NON_EMPTY_QUERY){ + //test candidates + if(useRemoteEndpointValidation){ //on remote endpoint + validateAgainstRemoteEndpoint(sparqlQueryCandidates); + } else {//on local model + + } + } + } + + public SortedSet<WeightedQuery> getGeneratedQueries() { + return generatedQueries; + } + + public SortedSet<WeightedQuery> getGeneratedQueries(int topN) { + SortedSet<WeightedQuery> topNQueries = new TreeSet<WeightedQuery>(); + int max = Math.min(topN, generatedQueries.size()); + for(WeightedQuery wQ : generatedQueries){ + topNQueries.add(wQ); + if(topNQueries.size() == max){ + break; + } + } + return topNQueries; + } + + public Set<Template> getTemplates(){ + return templates; + } + + public List<String> getGeneratedSPARQLQueries(){ + List<String> queries = new ArrayList<String>(); + for(WeightedQuery wQ : sparqlQueryCandidates){ + queries.add(wQ.getQuery().toString()); + } + + return queries; + } + + public Map<Template, Collection<? extends Query>> getTemplates2SPARQLQueries(){ + return template2Queries; + } + + public Map<Slot, List<String>> getSlot2URIs(){ + return slot2URI; + } + + private void normProminenceValues(Set<Allocation> allocations){ + double min = 0; + double max = 0; + for(Allocation a : allocations){ + if(a.getProminence() < min){ + min = a.getProminence(); + } + if(a.getProminence() > max){ + max = a.getProminence(); + } + } + if(min==max) {return;} + for(Allocation a : allocations){ + double prominence = a.getProminence()/(max-min); + a.setProminence(prominence); + } + } + + private void computeScore(Set<Allocation> allocations){ + double alpha = 0.8; + double beta = 1 - alpha; + + for(Allocation a : allocations){ + double score = alpha * a.getSimilarity() + beta * a.getProminence(); + a.setScore(score); + } + + } + + public Set<String> getRelevantKeywords(){ + return relevantKeywords; + } + + // just for testing the HMM integration, getWeightedSPARQLQueriesOld is the original one + private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates) + { + // for testing + for(Template template: templates) + { + { + ArrayList<String> keywords = new ArrayList<String>(); + for(Slot slot: template.getSlots()) + { + keywords.add(slot.getWords().get(0)); + } + if(template.getSlots().size()!=3) {continue;} +// if(!keywords.contains("Mean Hamster Software")) {continue;} +// if(!keywords.contains("published")) {continue;} + System.out.println("\"keywords\": "+keywords); + } + System.out.println(template); + SortedSet<WeightedQuery> queries = new TreeSet<WeightedQuery>(); + Query query = template.getQuery(); + double score = 0; + + Map<List<String>,List<ResourceInfo>> segmentToURIs = new HashMap<List<String>,List<ResourceInfo>>(); + Map<String,IndexResultItem> uriUniqueToResultItem = new HashMap<String,IndexResultItem>(); + for(Slot slot: template.getSlots()) + { + List<String> segment = new LinkedList<String>(); + segment.addAll(Arrays.asList(slot.getWords().get(0).split("\\s"))); + List<ResourceInfo> resourceInfos = new LinkedList<ResourceInfo>(); + + for(IndexResultItem item : getIndexResultItems(slot)) + { + // if this gets used at another place, create a function IndexResultItemToResourceInfo() + ResourceInfo info = new ResourceInfo(); + info.setUri(item.getUri()); + String label = item.getLabel(); + // in dbpedia, the last part of the uri is transformed from the english label, reverse the transformation (should almost always work for dbpedia article resources) + info.setLabel(label!=null?label:sfp.getShortForm(IRI.create(item.getUri()))); + // in saedeehs algorithm, the emission probabilty is formed by the string similarity + // but we use the lucene index score + double max = 0; + for(String word: slot.getWords()) {max = Math.max(max, Similarity.getSimilarity(word, info.getLabel()));} + if(max<0||max>1) throw new AssertionEr... [truncated message content] |
From: <ki...@us...> - 2012-09-27 09:40:01
|
Revision: 3850 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3850&view=rev Author: kirdie Date: 2012-09-27 09:39:55 +0000 (Thu, 27 Sep 2012) Log Message: ----------- upgraded jena-arq to 2.9.3 and jena-core to 2.7.3 to hopefully fix an NoSuchMethodError in QueryEngineHttp.setTimeout(long). Added the apache release repository "https://repository.apache.org/content/repositories/releases/". Modified Paths: -------------- branches/hmm/pom.xml Modified: branches/hmm/pom.xml =================================================================== --- branches/hmm/pom.xml 2012-09-26 14:44:42 UTC (rev 3849) +++ branches/hmm/pom.xml 2012-09-27 09:39:55 UTC (rev 3850) @@ -180,7 +180,7 @@ <dependency> <groupId>org.apache.jena</groupId> <artifactId>jena-core</artifactId> - <version>2.7.2</version> + <version>2.7.3</version> </dependency> <!--SwingX is in central --> <dependency> @@ -240,7 +240,7 @@ <dependency> <groupId>org.apache.jena</groupId> <artifactId>jena-arq</artifactId> - <version>2.9.2</version> + <version>2.9.3</version> </dependency> <!--Junits --> @@ -506,8 +506,12 @@ <url>http://elk-reasoner.googlecode.com/svn/m2/releases</url> <releases/> </repository> + <repository> + <id>apache-repo-releases</id> + <url>https://repository.apache.org/content/repositories/releases/</url> + <releases><enabled>true</enabled></releases> + </repository> </repositories> - <distributionManagement> <repository> <id>archiva.internal</id> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-09-26 14:44:53
|
Revision: 3849 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3849&view=rev Author: kirdie Date: 2012-09-26 14:44:42 +0000 (Wed, 26 Sep 2012) Log Message: ----------- test case now successfully disambiguates the Mean Hamster Software example from the hmm paper. Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-21 14:12:34 UTC (rev 3848) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-26 14:44:42 UTC (rev 3849) @@ -71,17 +71,21 @@ import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; -public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm{ - - enum Mode{ - BEST_QUERY, BEST_NON_EMPTY_QUERY - } - +/** The old learner taken over by Konrad Höffner for experiments with the Hidden Markov Algorithm by Saedeeh Shekarpur. + * + * */ +public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm +{ + enum Mode {BEST_QUERY, BEST_NON_EMPTY_QUERY} private Mode mode = Mode.BEST_QUERY; + + /** used to create a label out of the URI when there is no label available in the SPARQL endpoint.*/ private static SimpleIRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); + /** synonyms are great but are not used yet by the HMM algorithm. **/ private static final boolean CREATE_SYNONYMS = false; + /** The minimum score of items that are accepted from the Sindice search BOA index. **/ private static final Double BOA_THRESHOLD = 0.9; private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); @@ -511,8 +515,8 @@ keywords.add(slot.getWords().get(0)); } if(template.getSlots().size()!=3) {continue;} - if(!keywords.contains("Mean Hamster Software")) {continue;} - if(!keywords.contains("published")) {continue;} +// if(!keywords.contains("Mean Hamster Software")) {continue;} +// if(!keywords.contains("published")) {continue;} System.out.println("\"keywords\": "+keywords); } System.out.println(template); @@ -542,7 +546,8 @@ for(String word: slot.getWords()) {max = Math.max(max, Similarity.getSimilarity(word, info.getLabel()));} if(max<0||max>1) throw new AssertionError("max is not in [0,1], max="+max); info.setStringSimilarityScore(max); - + if(!info.setTypeFromDBpediaURI()) throw new AssertionError("could not set type for info "+info); + System.err.println("info with type: "+info); resourceInfos.add(info); } segmentToURIs.put(segment,resourceInfos); Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-21 14:12:34 UTC (rev 3848) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-26 14:44:42 UTC (rev 3849) @@ -105,7 +105,7 @@ private static final boolean DBPEDIA_PRETAGGED = true; private static final boolean OXFORD_PRETAGGED = false; - /*@Test*/ public void testDBpedia() throws Exception + @Test public void testDBpedia() throws Exception { File file = generateTestDataIfNecessary( new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), @@ -162,7 +162,7 @@ /** For debugging one question in particular. */ - @Test public void testSingleQueryDBpedia() + /*@Test*/ public void testSingleQueryDBpedia() { // Logger.getLogger(Templator.class).setLevel(Level.DEBUG); // Logger.getLogger(Parser.class).setLevel(Level.DEBUG); @@ -681,7 +681,7 @@ return testData; } - /** + /** Updates question file by removing questions without nonempty resource list answer and adding answers. * @param file * @param updatedFile * @throws ParserConfigurationException This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-09-21 14:12:42
|
Revision: 3848 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3848&view=rev Author: kirdie Date: 2012-09-21 14:12:34 +0000 (Fri, 21 Sep 2012) Log Message: ----------- more work on the hmm. Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/Index.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/IndexResultItem.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -5,6 +5,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; @@ -34,6 +35,7 @@ import org.dllearner.algorithm.tbsl.util.Knowledgebase; import org.dllearner.algorithm.tbsl.util.PopularityMap; import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType; +import org.dllearner.algorithm.tbsl.util.Similarity; import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; import org.dllearner.common.index.IndexResultSet; @@ -55,6 +57,8 @@ import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.reasoning.SPARQLReasoner; import org.ini4j.Options; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; import com.hp.hpl.jena.ontology.OntModelSpec; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; @@ -74,8 +78,11 @@ } private Mode mode = Mode.BEST_QUERY; + private static SimpleIRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); + private static final boolean CREATE_SYNONYMS = false; + private static final Double BOA_THRESHOLD = 0.9; private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); @@ -371,7 +378,7 @@ logger.debug("Generating SPARQL query templates..."); templateMon.start(); if(multiThreaded){ - templates = templateGenerator.buildTemplatesMultiThreaded(question); + templates = templateGenerator.buildTemplatesMultiThreaded(question,CREATE_SYNONYMS); } else { templates = templateGenerator.buildTemplates(question); } @@ -512,8 +519,9 @@ SortedSet<WeightedQuery> queries = new TreeSet<WeightedQuery>(); Query query = template.getQuery(); double score = 0; - + Map<List<String>,List<ResourceInfo>> segmentToURIs = new HashMap<List<String>,List<ResourceInfo>>(); + Map<String,IndexResultItem> uriUniqueToResultItem = new HashMap<String,IndexResultItem>(); for(Slot slot: template.getSlots()) { List<String> segment = new LinkedList<String>(); @@ -525,9 +533,19 @@ // if this gets used at another place, create a function IndexResultItemToResourceInfo() ResourceInfo info = new ResourceInfo(); info.setUri(item.getUri()); - info.setLabel(item.getLabel()); + String label = item.getLabel(); + // in dbpedia, the last part of the uri is transformed from the english label, reverse the transformation (should almost always work for dbpedia article resources) + info.setLabel(label!=null?label:sfp.getShortForm(IRI.create(item.getUri()))); + // in saedeehs algorithm, the emission probabilty is formed by the string similarity + // but we use the lucene index score + double max = 0; + for(String word: slot.getWords()) {max = Math.max(max, Similarity.getSimilarity(word, info.getLabel()));} + if(max<0||max>1) throw new AssertionError("max is not in [0,1], max="+max); + info.setStringSimilarityScore(max); + + resourceInfos.add(info); } - segmentToURIs.put(segment,resources); + segmentToURIs.put(segment,resourceInfos); } HiddenMarkovModel hmm = new HiddenMarkovModel(); hmm.initialization(); @@ -851,11 +869,11 @@ private Set<IndexResultItem> getIndexResultItems(Slot slot) { -// List<String> uris = new LinkedList<String>(); + // List<String> uris = new LinkedList<String>(); Set<IndexResultItem> indexResultItems = new HashSet<IndexResultItem>(); - + Index index = getIndexBySlotType(slot); - + for(String word : slot.getWords()) { IndexResultSet rs = new IndexResultSet(); @@ -876,18 +894,23 @@ //use the non manual indexes only if mapping based resultset is not empty and option is set if(!useManualMappingsIfExistOnly || rs.isEmpty()){ if(slot.getSlotType() == SlotType.RESOURCE){ - rs.add(index.getResourcesWithScores(word, 20)); + rs.add(index.getResourcesWithScores(word, 20,0)); } else { if(slot.getSlotType() == SlotType.CLASS){ word = PlingStemmer.stem(word); } - rs.add(index.getResourcesWithScores(word, 20)); + IndexResultSet tmp = index.getResourcesWithScores(word, 20,0,Collections.singleton("boa-score")); + for(IndexResultItem item : tmp.getItems()) + {System.out.println(item); + Double boaScore = (Double) item.getFields().get("boa-score"); + if(boaScore==null||boaScore>BOA_THRESHOLD) rs.addItem(item); + } } } -// for(IndexResultItem item: rs.getItems()) -// { -// uris.add(item.getUri()); -// } + // for(IndexResultItem item: rs.getItems()) + // { + // uris.add(item.getUri()); + // } indexResultItems.addAll(rs.getItems()); } return indexResultItems; Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -31,111 +31,111 @@ import org.dllearner.algorithm.tbsl.sparql.Template; public class Templator { - + private static final Logger logger = Logger.getLogger(Templator.class); - - String[] GRAMMAR_FILES = {"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}; - + + String[] GRAMMAR_FILES = {"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}; + private String[] noun = {"NN","NNS","NNP","NNPS","NPREP","JJNN","JJNPREP"}; private String[] adjective = {"JJ","JJR","JJS","JJH"}; private String[] verb = {"VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN","GERUNDIN","VPREP","WHEN","WHERE"}; - + PartOfSpeechTagger tagger; LTAGLexicon g; LTAG_Lexicon_Constructor LTAG_Constructor = new LTAG_Lexicon_Constructor(); Parser parser; Preprocessor pp; - + WordNet wordnet; LingPipeLemmatizer lem = new LingPipeLemmatizer(); - - DUDE2UDRS_Converter d2u = new DUDE2UDRS_Converter(); - DRS2SPARQL_Converter d2s = new DRS2SPARQL_Converter(); - + + DUDE2UDRS_Converter d2u = new DUDE2UDRS_Converter(); + DRS2SPARQL_Converter d2s = new DRS2SPARQL_Converter(); + boolean ONE_SCOPE_ONLY = true; boolean UNTAGGED_INPUT = true; boolean USE_NER = false; boolean USE_WORDNET = true; boolean VERBOSE = true; - + private String taggedInput; - + private Set<Template> templates; private Set<DRS> drses; - + public Templator() { this(new StanfordPartOfSpeechTagger(), new WordNet()); } - + public Templator(final PartOfSpeechTagger tagger) { this(tagger, new WordNet()); } - + public Templator(final PartOfSpeechTagger tagger, WordNet wordnet) { - this.tagger = tagger; - this.wordnet = wordnet; - - List<InputStream> grammarFiles = new ArrayList<InputStream>(); - for(int i = 0; i < GRAMMAR_FILES.length; i++){ - grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); - } - - g = LTAG_Constructor.construct(grammarFiles); - - parser = new Parser(); - parser.SHOW_GRAMMAR = true; - parser.USE_DPS_AS_INITTREES = true; - parser.CONSTRUCT_SEMANTICS = true; - parser.MODE = "LEIPZIG"; - - pp = new Preprocessor(USE_NER); + this.tagger = tagger; + this.wordnet = wordnet; + + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } + + g = LTAG_Constructor.construct(grammarFiles); + + parser = new Parser(); + parser.SHOW_GRAMMAR = true; + parser.USE_DPS_AS_INITTREES = true; + parser.CONSTRUCT_SEMANTICS = true; + parser.MODE = "LEIPZIG"; + + pp = new Preprocessor(USE_NER); } - + public Templator(final PartOfSpeechTagger tagger, WordNet wordnet, String[] GRAMMAR_FILES) { - this.tagger = tagger; - this.wordnet = wordnet; - this.GRAMMAR_FILES = GRAMMAR_FILES; + this.tagger = tagger; + this.wordnet = wordnet; + this.GRAMMAR_FILES = GRAMMAR_FILES; - List<InputStream> grammarFiles = new ArrayList<InputStream>(); - for(int i = 0; i < GRAMMAR_FILES.length; i++) { - grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); - } + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++) { + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } - g = LTAG_Constructor.construct(grammarFiles); + g = LTAG_Constructor.construct(grammarFiles); - parser = new Parser(); - parser.SHOW_GRAMMAR = true; - parser.USE_DPS_AS_INITTREES = true; - parser.CONSTRUCT_SEMANTICS = true; - parser.MODE = "LEIPZIG"; + parser = new Parser(); + parser.SHOW_GRAMMAR = true; + parser.USE_DPS_AS_INITTREES = true; + parser.CONSTRUCT_SEMANTICS = true; + parser.MODE = "LEIPZIG"; - pp = new Preprocessor(USE_NER); -} - + pp = new Preprocessor(USE_NER); + } + public Templator(boolean b) { - this.tagger = new StanfordPartOfSpeechTagger(); - this.USE_WORDNET = false; - VERBOSE = b; - - List<InputStream> grammarFiles = new ArrayList<InputStream>(); - for(int i = 0; i < GRAMMAR_FILES.length; i++){ - grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); - } - - g = LTAG_Constructor.construct(grammarFiles); - - parser = new Parser(); - parser.SHOW_GRAMMAR = false; - parser.VERBOSE = b; - parser.USE_DPS_AS_INITTREES = true; - parser.CONSTRUCT_SEMANTICS = true; - parser.MODE = "LEIPZIG"; - - pp = new Preprocessor(USE_NER); - pp.setVERBOSE(b); + this.tagger = new StanfordPartOfSpeechTagger(); + this.USE_WORDNET = false; + VERBOSE = b; + + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } + + g = LTAG_Constructor.construct(grammarFiles); + + parser = new Parser(); + parser.SHOW_GRAMMAR = false; + parser.VERBOSE = b; + parser.USE_DPS_AS_INITTREES = true; + parser.CONSTRUCT_SEMANTICS = true; + parser.MODE = "LEIPZIG"; + + pp = new Preprocessor(USE_NER); + pp.setVERBOSE(b); } - + public void setUNTAGGED_INPUT(boolean b) { UNTAGGED_INPUT = b; } @@ -146,20 +146,21 @@ VERBOSE = b; } public void setGrammarFiles(String[] gf) { - GRAMMAR_FILES = gf; - List<InputStream> grammarFiles = new ArrayList<InputStream>(); - for(int i = 0; i < GRAMMAR_FILES.length; i++){ - grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); - } - g = LTAG_Constructor.construct(grammarFiles); + GRAMMAR_FILES = gf; + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } + g = LTAG_Constructor.construct(grammarFiles); } - public Set<Template> buildTemplates(String s) { - - d2s.setInputString(s); - + public Set<Template> buildTemplates(String s) {return buildTemplates(s,true);} + public Set<Template> buildTemplates(String s, boolean createSynonyms) { + + d2s.setInputString(s); + boolean clearAgain = true; - + String tagged; if (UNTAGGED_INPUT) { s = pp.normalize(s); @@ -176,135 +177,140 @@ newtagged = pp.condenseNominals(pp.findNEs(tagged,s)); } else newtagged = pp.condenseNominals(tagged); - + newtagged = pp.condense(newtagged); logger.debug("Preprocessed: " + newtagged); - - parser.parse(newtagged,g); - - if (parser.getDerivationTrees().isEmpty()) { - parser.clear(g,parser.getTemps()); - clearAgain = false; - if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed."); - } - else { - try { - parser.buildDerivedTrees(g); - } catch (ParseException e) { - if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); - } - } - // build pairs <String,POStag> from tagged - Hashtable<String,String> postable = new Hashtable<String,String>(); - for (String st : newtagged.split(" ")) { + parser.parse(newtagged,g); + + if (parser.getDerivationTrees().isEmpty()) { + parser.clear(g,parser.getTemps()); + clearAgain = false; + if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed."); + } + else { + try { + parser.buildDerivedTrees(g); + } catch (ParseException e) { + if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); + } + } + + // build pairs <String,POStag> from tagged + Hashtable<String,String> postable = new Hashtable<String,String>(); + for (String st : newtagged.split(" ")) { postable.put(st.substring(0,st.indexOf("/")).toLowerCase(),st.substring(st.indexOf("/")+1));; } - // - - Set<DRS> drses = new HashSet<DRS>(); - Set<Template> templates = new HashSet<Template>(); - - for (Dude dude : parser.getDudes()) { - UDRS udrs = d2u.convert(dude); - if (udrs != null) { - - for (DRS drs : udrs.initResolve()) { - - List<Slot> slots = new ArrayList<Slot>(); - slots.addAll(dude.getSlots()); - d2s.setSlots(slots); - d2s.redundantEqualRenaming(drs); - - if (!containsModuloRenaming(drses,drs)) { -// // DEBUG - if (VERBOSE) { - logger.debug(">>> DUDE:\n" + dude.toString()); - logger.debug("\n>>> DRS:\n"+ drs.toString()); - for (Slot sl : slots) { - logger.debug(sl.toString()); - } - } -// // - drses.add(drs); - - try { - Template temp = d2s.convert(drs,slots); - if (temp == null) { continue; } - temp = temp.checkandrefine(); - if (temp == null) { continue; } - - if (USE_WORDNET) { // find WordNet synonyms - List<String> newwords; - String word; - String pos; - for (Slot slot : temp.getSlots()) { - if (!slot.getWords().isEmpty()) { - - word = slot.getWords().get(0); - pos = postable.get(word.toLowerCase().replace(" ","_")); - - POS wordnetpos = null; - if (pos != null) { - if (equalsOneOf(pos,noun)) { - wordnetpos = POS.NOUN; - } - else if (equalsOneOf(pos,adjective)) { - wordnetpos = POS.ADJECTIVE; - } - else if (equalsOneOf(pos,verb)) { - wordnetpos = POS.VERB; - } - } - - List<String> strings = new ArrayList<String>(); - if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { - strings = wordnet.getAttributes(word); - } - - newwords = new ArrayList<String>(); - newwords.addAll(slot.getWords()); - newwords.addAll(strings); - - if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); - for (String att : getLemmatizedWords(strings)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); - } - } - if (newwords.isEmpty()) { - newwords.add(slot.getWords().get(0)); - } - List<String> newwordslist = new ArrayList<String>(); - newwordslist.addAll(newwords); - slot.setWords(newwordslist); - } - } - } - // - - templates.add(temp); - } catch (java.lang.ClassCastException e) { - continue; - } - if (ONE_SCOPE_ONLY) { break; } - } - } - } - } - - if (clearAgain) { - parser.clear(g,parser.getTemps()); - } -// System.gc(); - - return templates; - } - - public Set<Template> buildTemplatesMultiThreaded(String s) { - + // + + Set<DRS> drses = new HashSet<DRS>(); + Set<Template> templates = new HashSet<Template>(); + + for (Dude dude : parser.getDudes()) { + UDRS udrs = d2u.convert(dude); + if (udrs != null) { + + for (DRS drs : udrs.initResolve()) { + + List<Slot> slots = new ArrayList<Slot>(); + slots.addAll(dude.getSlots()); + d2s.setSlots(slots); + d2s.redundantEqualRenaming(drs); + + if (!containsModuloRenaming(drses,drs)) { + // // DEBUG + if (VERBOSE) { + logger.debug(">>> DUDE:\n" + dude.toString()); + logger.debug("\n>>> DRS:\n"+ drs.toString()); + for (Slot sl : slots) { + logger.debug(sl.toString()); + } + } + // // + drses.add(drs); + + try { + Template temp = d2s.convert(drs,slots); + if (temp == null) { continue; } + temp = temp.checkandrefine(); + if (temp == null) { continue; } + + if (USE_WORDNET) { // find WordNet synonyms + List<String> newwords; + String word; + String pos; + for (Slot slot : temp.getSlots()) { + if (!slot.getWords().isEmpty()) { + + word = slot.getWords().get(0); + pos = postable.get(word.toLowerCase().replace(" ","_")); + + POS wordnetpos = null; + if (pos != null) { + if (equalsOneOf(pos,noun)) { + wordnetpos = POS.NOUN; + } + else if (equalsOneOf(pos,adjective)) { + wordnetpos = POS.ADJECTIVE; + } + else if (equalsOneOf(pos,verb)) { + wordnetpos = POS.VERB; + } + } + + List<String> strings = new ArrayList<String>(); + if(createSynonyms) + { + if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) {strings = wordnet.getAttributes(word);} + } + + newwords = new ArrayList<String>(); + newwords.addAll(slot.getWords()); + newwords.addAll(strings); + + if(createSynonyms) + { + if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); + for (String att : getLemmatizedWords(strings)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + } + } + } + if (newwords.isEmpty()) { + newwords.add(slot.getWords().get(0)); + } + List<String> newwordslist = new ArrayList<String>(); + newwordslist.addAll(newwords); + slot.setWords(newwordslist); + } + } + } + // + + templates.add(temp); + } catch (java.lang.ClassCastException e) { + continue; + } + if (ONE_SCOPE_ONLY) { break; } + } + } + } + } + + if (clearAgain) { + parser.clear(g,parser.getTemps()); + } + // System.gc(); + + return templates; + } + + public Set<Template> buildTemplatesMultiThreaded(String s) {return buildTemplates(s,true);} + public Set<Template> buildTemplatesMultiThreaded(String s,boolean createSynonyms) { + boolean clearAgain = true; - + String tagged; if (UNTAGGED_INPUT) { s = pp.normalize(s); @@ -321,148 +327,153 @@ newtagged = pp.condenseNominals(pp.findNEs(tagged,s)); } else newtagged = pp.condenseNominals(tagged); - + newtagged = pp.condense(newtagged); logger.debug("Preprocessed: " + newtagged); - - parser.parseMultiThreaded(newtagged,g); - - if (parser.getDerivationTrees().isEmpty()) { - parser.clear(g,parser.getTemps()); - clearAgain = false; - logger.error("[Templator.java] '" + s + "' could not be parsed."); - } - else { - try { - parser.buildDerivedTreesMultiThreaded(g); - } catch (ParseException e) { - logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); - } - } - // build pairs <String,POStag> from tagged - Hashtable<String,String> postable = new Hashtable<String,String>(); - for (String st : newtagged.split(" ")) { + parser.parseMultiThreaded(newtagged,g); + + if (parser.getDerivationTrees().isEmpty()) { + parser.clear(g,parser.getTemps()); + clearAgain = false; + logger.error("[Templator.java] '" + s + "' could not be parsed."); + } + else { + try { + parser.buildDerivedTreesMultiThreaded(g); + } catch (ParseException e) { + logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); + } + } + + // build pairs <String,POStag> from tagged + Hashtable<String,String> postable = new Hashtable<String,String>(); + for (String st : newtagged.split(" ")) { postable.put(st.substring(0,st.indexOf("/")).toLowerCase(),st.substring(st.indexOf("/")+1));; } - // - - drses = new HashSet<DRS>(); - templates = new HashSet<Template>(); - -// ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); -// for (Dude dude : p.getDudes()) { -// threadPool.execute(new DudeProcessor(dude, postable)); -// } -// threadPool.shutdown(); -// while(!threadPool.isTerminated()){} - - for (Dude dude : parser.getDudes()) { - - UDRS udrs = d2u.convert(dude); - if (udrs != null) { - - for (DRS drs : udrs.initResolve()) { - - List<Slot> slots = new ArrayList<Slot>(); - slots.addAll(dude.getSlots()); - d2s.setSlots(slots); - d2s.redundantEqualRenaming(drs); - - if (!containsModuloRenaming(drses,drs)) { -// // DEBUG - logger.debug(dude); - logger.debug(drs); - for (Slot sl : slots) { - logger.debug(sl.toString()); - } -// // - drses.add(drs); - - try { - Template temp = d2s.convert(drs,slots); - if (temp == null) { continue; } - temp = temp.checkandrefine(); - if (temp == null) { continue; } - - - if (USE_WORDNET) { // find WordNet synonyms - List<String> newwords; - String word; - String pos; - for (Slot slot : temp.getSlots()) { - if (!slot.getWords().isEmpty()) { - - word = slot.getWords().get(0); - pos = postable.get(word.toLowerCase().replace(" ","_")); - - POS wordnetpos = null; - if (pos != null) { - if (equalsOneOf(pos,noun)) { - wordnetpos = POS.NOUN; - } - else if (equalsOneOf(pos,adjective)) { - wordnetpos = POS.ADJECTIVE; - } - else if (equalsOneOf(pos,verb)) { - wordnetpos = POS.VERB; - } - } - - List<String> strings = new ArrayList<String>(); - if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { - strings = wordnet.getAttributes(word); - } - - newwords = new ArrayList<String>(); - newwords.addAll(slot.getWords()); - newwords.addAll(strings); - - if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); - for (String att : getLemmatizedWords(strings)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); - } - } - if (newwords.isEmpty()) { - newwords.add(slot.getWords().get(0)); - } - List<String> newwordslist = new ArrayList<String>(); - newwordslist.addAll(newwords); - slot.setWords(newwordslist); - } - } - } - // - - templates.add(temp); - } catch (java.lang.ClassCastException e) { - continue; - } - if (ONE_SCOPE_ONLY) { break; } - } - } - + // + + drses = new HashSet<DRS>(); + templates = new HashSet<Template>(); + + // ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + // for (Dude dude : p.getDudes()) { + // threadPool.execute(new DudeProcessor(dude, postable)); + // } + // threadPool.shutdown(); + // while(!threadPool.isTerminated()){} + + for (Dude dude : parser.getDudes()) { + + UDRS udrs = d2u.convert(dude); + if (udrs != null) { + + for (DRS drs : udrs.initResolve()) { + + List<Slot> slots = new ArrayList<Slot>(); + slots.addAll(dude.getSlots()); + d2s.setSlots(slots); + d2s.redundantEqualRenaming(drs); + + if (!containsModuloRenaming(drses,drs)) { + // // DEBUG + logger.debug(dude); + logger.debug(drs); + for (Slot sl : slots) { + logger.debug(sl.toString()); + } + // // + drses.add(drs); + + try { + Template temp = d2s.convert(drs,slots); + if (temp == null) { continue; } + temp = temp.checkandrefine(); + if (temp == null) { continue; } + + + if (USE_WORDNET) { // find WordNet synonyms + List<String> newwords; + String word; + String pos; + for (Slot slot : temp.getSlots()) { + if (!slot.getWords().isEmpty()) { + + word = slot.getWords().get(0); + pos = postable.get(word.toLowerCase().replace(" ","_")); + + POS wordnetpos = null; + if (pos != null) { + if (equalsOneOf(pos,noun)) { + wordnetpos = POS.NOUN; + } + else if (equalsOneOf(pos,adjective)) { + wordnetpos = POS.ADJECTIVE; + } + else if (equalsOneOf(pos,verb)) { + wordnetpos = POS.VERB; + } + } + + List<String> strings = new ArrayList<String>(); + if(createSynonyms) + { + if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { + strings = wordnet.getAttributes(word); + } + } + newwords = new ArrayList<String>(); + newwords.addAll(slot.getWords()); + newwords.addAll(strings); + + if(createSynonyms) + { + if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); + for (String att : getLemmatizedWords(strings)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + } + } + } + if (newwords.isEmpty()) { + newwords.add(slot.getWords().get(0)); + } + List<String> newwordslist = new ArrayList<String>(); + newwordslist.addAll(newwords); + slot.setWords(newwordslist); + } + } + } + // + + templates.add(temp); + } catch (java.lang.ClassCastException e) { + continue; + } + if (ONE_SCOPE_ONLY) { break; } + } + } + + } + } + + + if (clearAgain) { + parser.clear(g,parser.getTemps()); + } + // System.gc(); + + return templates; } - } - - - if (clearAgain) { - parser.clear(g,parser.getTemps()); - } -// System.gc(); - - return templates; - } - + public String getTaggedInput() { return taggedInput; } - + public List<String> getUnknownWords(){ return parser.getUnknownWords(); } - + private List<String> getLemmatizedWords(List<String> words){ List<String> stemmed = new ArrayList<String>(); for(String word : words){ @@ -472,15 +483,15 @@ } else { stemmed.add(getLemmatizedWord(word)); } - + } return stemmed; } - + private String getLemmatizedWord(String word){ return lem.stem(word); } - + private boolean containsModuloRenaming(Set<DRS> drses, DRS drs) { for (DRS d : drses) { @@ -490,7 +501,7 @@ } return false; } - + private boolean equalsOneOf(String string,String[] strings) { for (String s : strings) { if (string.equals(s)) { @@ -499,30 +510,30 @@ } return false; } - + private String extractSentence(String taggedSentence){ - int pos = taggedSentence.indexOf("/"); - while(pos != -1){ - String first = taggedSentence.substring(0, pos); - int endPos = taggedSentence.substring(pos).indexOf(" "); - if(endPos == -1){ - endPos = taggedSentence.substring(pos).length(); - } - String rest = taggedSentence.substring(pos + endPos); - - taggedSentence = first + rest; - pos = taggedSentence.indexOf("/"); - - } - return taggedSentence; - - } - + int pos = taggedSentence.indexOf("/"); + while(pos != -1){ + String first = taggedSentence.substring(0, pos); + int endPos = taggedSentence.substring(pos).indexOf(" "); + if(endPos == -1){ + endPos = taggedSentence.substring(pos).length(); + } + String rest = taggedSentence.substring(pos + endPos); + + taggedSentence = first + rest; + pos = taggedSentence.indexOf("/"); + + } + return taggedSentence; + + } + class DudeProcessor implements Runnable{ - + private Dude dude; private Hashtable<String,String> postable; - + public DudeProcessor(Dude dude, Hashtable<String,String> postable) { this.dude = dude; this.postable = postable; @@ -530,94 +541,94 @@ @Override public void run() { - UDRS udrs = d2u.convert(dude); - if (udrs != null) { - - for (DRS drs : udrs.initResolve()) { - - List<Slot> slots = new ArrayList<Slot>(); - slots.addAll(dude.getSlots()); - d2s.setSlots(slots); - d2s.redundantEqualRenaming(drs); - - if (!containsModuloRenaming(drses,drs)) { -// // DEBUG - if (VERBOSE) { - logger.debug(dude); - logger.debug(drs); - for (Slot sl : slots) { - logger.debug(sl.toString()); - } - } -// // - drses.add(drs); - - try { - Template temp = d2s.convert(drs,slots); - temp = temp.checkandrefine(); - if (temp == null) { - continue; - } - - if (USE_WORDNET) { // find WordNet synonyms - List<String> newwords; - String word; - String pos; - for (Slot slot : temp.getSlots()) { - if (!slot.getWords().isEmpty()) { - - word = slot.getWords().get(0); - pos = postable.get(word.toLowerCase().replace(" ","_")); - - POS wordnetpos = null; - if (pos != null) { - if (equalsOneOf(pos,noun)) { - wordnetpos = POS.NOUN; - } - else if (equalsOneOf(pos,adjective)) { - wordnetpos = POS.ADJECTIVE; - } - else if (equalsOneOf(pos,verb)) { - wordnetpos = POS.VERB; - } - } - - List<String> strings = new ArrayList<String>(); - if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { - strings = wordnet.getAttributes(word); - } - - newwords = new ArrayList<String>(); - newwords.addAll(slot.getWords()); - newwords.addAll(strings); - - if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); - for (String att : getLemmatizedWords(strings)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); - } - } - if (newwords.isEmpty()) { - newwords.add(slot.getWords().get(0)); - } - List<String> newwordslist = new ArrayList<String>(); - newwordslist.addAll(newwords); - slot.setWords(newwordslist); - } - } - } - // - - templates.add(temp); - } catch (java.lang.ClassCastException e) { - continue; - } - if (ONE_SCOPE_ONLY) { break; } - } - } - } + UDRS udrs = d2u.convert(dude); + if (udrs != null) { + + for (DRS drs : udrs.initResolve()) { + + List<Slot> slots = new ArrayList<Slot>(); + slots.addAll(dude.getSlots()); + d2s.setSlots(slots); + d2s.redundantEqualRenaming(drs); + + if (!containsModuloRenaming(drses,drs)) { + // // DEBUG + if (VERBOSE) { + logger.debug(dude); + logger.debug(drs); + for (Slot sl : slots) { + logger.debug(sl.toString()); + } + } + // // + drses.add(drs); + + try { + Template temp = d2s.convert(drs,slots); + temp = temp.checkandrefine(); + if (temp == null) { + continue; + } + + if (USE_WORDNET) { // find WordNet synonyms + List<String> newwords; + String word; + String pos; + for (Slot slot : temp.getSlots()) { + if (!slot.getWords().isEmpty()) { + + word = slot.getWords().get(0); + pos = postable.get(word.toLowerCase().replace(" ","_")); + + POS wordnetpos = null; + if (pos != null) { + if (equalsOneOf(pos,noun)) { + wordnetpos = POS.NOUN; + } + else if (equalsOneOf(pos,adjective)) { + wordnetpos = POS.ADJECTIVE; + } + else if (equalsOneOf(pos,verb)) { + wordnetpos = POS.VERB; + } + } + + List<String> strings = new ArrayList<String>(); + if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { + strings = wordnet.getAttributes(word); + } + + newwords = new ArrayList<String>(); + newwords.addAll(slot.getWords()); + newwords.addAll(strings); + + if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); + for (String att : getLemmatizedWords(strings)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + } + } + if (newwords.isEmpty()) { + newwords.add(slot.getWords().get(0)); + } + List<String> newwordslist = new ArrayList<String>(); + newwordslist.addAll(newwords); + slot.setWords(newwordslist); + } + } + } + // + + templates.add(temp); + } catch (java.lang.ClassCastException e) { + continue; + } + if (ONE_SCOPE_ONLY) { break; } + } + } + } } - + } } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -1,6 +1,8 @@ package org.dllearner.common.index; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.List; public class HierarchicalIndex implements Index{ @@ -48,9 +50,15 @@ @Override public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset) { - IndexResultSet rs = primaryIndex.getResourcesWithScores(queryString, limit, offset); + return getResourcesWithScores(queryString, limit, DEFAULT_OFFSET,Collections.<String>emptyList()); + } + + @Override public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset, + Collection<String> additionalFields) + { + IndexResultSet rs = primaryIndex.getResourcesWithScores(queryString, limit, offset, additionalFields); if(rs.getItems().size() < limit){ - rs.add(secondaryIndex.getResourcesWithScores(queryString, limit-rs.getItems().size(), offset)); + rs.add(secondaryIndex.getResourcesWithScores(queryString, limit-rs.getItems().size(), offset,additionalFields)); } return rs; } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/Index.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/Index.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/Index.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -1,5 +1,6 @@ package org.dllearner.common.index; +import java.util.Collection; import java.util.List; import java.util.Map; @@ -10,4 +11,5 @@ IndexResultSet getResourcesWithScores(String queryString); IndexResultSet getResourcesWithScores(String queryString, int limit); IndexResultSet getResourcesWithScores(String queryString, int limit, int offset); + IndexResultSet getResourcesWithScores(String queryString, int limit, int offset, Collection<String> additionalFields); } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/IndexResultItem.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/IndexResultItem.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/IndexResultItem.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -1,26 +1,34 @@ package org.dllearner.common.index; +import java.util.Collections; +import java.util.Map; + public class IndexResultItem { + private final String uri; + private final String label; + private final float score; + private final Map<String,? extends Object> fields; - private String uri; - private String label; - private float score; + public IndexResultItem(String uri, String label, float score) + {this(uri,label,score,Collections.<String,Object>emptyMap());} - public IndexResultItem(String uri, String label, float score) { + public IndexResultItem(String uri, String label, float score,Map<String,? extends Object> fields) + { this.uri = uri; this.label = label; this.score = score; + if(fields==null) throw new AssertionError("fields null"); + this.fields = fields; } - public String getUri() { - return uri; - } + public String getUri() {return uri;} + public String getLabel() {return label; } + public float getScore() {return score;} + public Map<String,? extends Object> getFields() {return fields;} - public String getLabel() { - return label; + @Override public String toString() + { + // TODO Auto-generated method stub + return "label:" + label + "--uri:" + uri + "--fields:" + fields; } - - public float getScore() { - return score; - } } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -2,8 +2,11 @@ import java.net.MalformedURLException; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; import java.util.List; - +import java.util.Map; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery.ORDER; import org.apache.solr.client.solrj.SolrServerException; @@ -15,19 +18,19 @@ import org.apache.solr.common.params.ModifiableSolrParams; public class SOLRIndex implements Index{ - -private CommonsHttpSolrServer server; - + + private CommonsHttpSolrServer server; + private static final int DEFAULT_LIMIT = 10; private static final int DEFAULT_OFFSET = 0; - + private String primarySearchField; private String secondarySearchField; - + private String sortField; - + private boolean restrictiveSearch = true; - + public SOLRIndex(String solrServerURL){ try { server = new CommonsHttpSolrServer(solrServerURL); @@ -36,20 +39,20 @@ e.printStackTrace(); } } - + public void setSearchFields(String primarySearchField, String secondarySearchField){ this.primarySearchField = primarySearchField; this.secondarySearchField = secondarySearchField; } - + public void setPrimarySearchField(String primarySearchField) { this.primarySearchField = primarySearchField; } - + public void setSecondarySearchField(String secondarySearchField) { this.secondarySearchField = secondarySearchField; } - + @Override public List<String> getResources(String queryString) { return getResources(queryString, DEFAULT_LIMIT); @@ -91,9 +94,12 @@ } @Override - public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset) { - IndexResultSet rs = new IndexResultSet(); - + public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset) + {return getResourcesWithScores(queryString,limit,offset,Collections.<String>emptyList());} + + public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset, Collection<String> additionalFields) + { + IndexResultSet rs = new IndexResultSet(); QueryResponse response; try { String solrString = queryString; @@ -112,36 +118,43 @@ } solrString += ")"; } - + } else { solrString += queryString; } } SolrQuery query = new SolrQuery(solrString); - query.setRows(limit); - query.setStart(offset); - if(sortField != null){ - query.addSortField(sortField, ORDER.desc); - } - query.addField("score"); + query.setRows(limit); + query.setStart(offset); + if(sortField != null){ + query.addSortField(sortField, ORDER.desc); + } + query.addField("score"); response = server.query(query); SolrDocumentList docList = response.getResults(); - - for(SolrDocument d : docList){ + + for(SolrDocument d : docList) + { + Map<String,Object> fields = new HashMap<String,Object>(); + for(String field: additionalFields) + { + Object o = d.get(field); + if(o!=null) {fields.put(field,o);} + } float score = 0; if(d.get("score") instanceof ArrayList){ score = ((Float)((ArrayList)d.get("score")).get(1)); } else { score = (Float) d.get("score"); } - rs.addItem(new IndexResultItem((String) d.get("uri"), (String) d.get("label"), score)); + rs.addItem(new IndexResultItem((String) d.get("uri"), (String) d.get("label"), score,fields)); } } catch (SolrServerException e) { e.printStackTrace(); } return rs; } - + public void setSortField(String sortField){ this.sortField = sortField; } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -1,6 +1,7 @@ package org.dllearner.common.index; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import org.dllearner.kb.sparql.ExtractionDBCache; @@ -150,5 +151,11 @@ public Model getModel() { return model; } + + @Override public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset, + Collection<String> additionalFields) + { + throw new UnsupportedOperationException("TODO: implement this later"); + } } Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -806,7 +806,7 @@ Index propertiesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_properties"); SOLRIndex boa_propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/boa_fact_detail"); boa_propertiesIndex.setSortField("boa-score"); - propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex); +// propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex); MappingBasedIndex mappingIndex= new MappingBasedIndex( SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_class_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_resource_mappings.txt").getPath(), This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-09-20 15:44:31
|
Revision: 3847 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3847&view=rev Author: kirdie Date: 2012-09-20 15:44:22 +0000 (Thu, 20 Sep 2012) Log Message: ----------- now passing the labels as well to the hmm. Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-19 15:15:56 UTC (rev 3846) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-20 15:44:22 UTC (rev 3847) @@ -1,7 +1,9 @@ package org.dllearner.algorithm.tbsl.learning; import hmm.HiddenMarkovModel; +import hmm.ResourceInfo; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -511,19 +513,21 @@ Query query = template.getQuery(); double score = 0; - Map<List<String>,Collection<String>> segmentToURIs = new HashMap<List<String>,Collection<String>>(); + Map<List<String>,List<ResourceInfo>> segmentToURIs = new HashMap<List<String>,List<ResourceInfo>>(); for(Slot slot: template.getSlots()) { List<String> segment = new LinkedList<String>(); - segment.add(slot.getWords().get(0)); // TODO: split it up? - - Set<String> uris = new HashSet<String>(); - + segment.addAll(Arrays.asList(slot.getWords().get(0).split("\\s"))); + List<ResourceInfo> resourceInfos = new LinkedList<ResourceInfo>(); + for(IndexResultItem item : getIndexResultItems(slot)) { - uris.add(item.getUri()); + // if this gets used at another place, create a function IndexResultItemToResourceInfo() + ResourceInfo info = new ResourceInfo(); + info.setUri(item.getUri()); + info.setLabel(item.getLabel()); } - segmentToURIs.put(segment,uris); + segmentToURIs.put(segment,resources); } HiddenMarkovModel hmm = new HiddenMarkovModel(); hmm.initialization(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-09-19 15:16:08
|
Revision: 3846 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3846&view=rev Author: kirdie Date: 2012-09-19 15:15:56 +0000 (Wed, 19 Sep 2012) Log Message: ----------- started implementing using hmm for disambiguation Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-19 15:12:20 UTC (rev 3845) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-19 15:15:56 UTC (rev 3846) @@ -1,27 +1,19 @@ package org.dllearner.algorithm.tbsl.learning; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URL; +import hmm.HiddenMarkovModel; import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Set; +import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; +import org.apache.commons.collections15.MultiMap; import org.apache.log4j.Logger; import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; @@ -31,13 +23,7 @@ import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.sparql.Allocation; import org.dllearner.algorithm.tbsl.sparql.Query; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property; import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Value; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; @@ -46,13 +32,10 @@ import org.dllearner.algorithm.tbsl.util.Knowledgebase; import org.dllearner.algorithm.tbsl.util.PopularityMap; import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType; -import org.dllearner.algorithm.tbsl.util.Similarity; -import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper.SymPropertyDirection; import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; import org.dllearner.common.index.IndexResultSet; import org.dllearner.common.index.MappingBasedIndex; -import org.dllearner.common.index.SOLRIndex; import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; import org.dllearner.common.index.SPARQLIndex; import org.dllearner.common.index.SPARQLObjectPropertiesIndex; @@ -63,19 +46,13 @@ import org.dllearner.core.ComponentInitException; import org.dllearner.core.LearningProblem; import org.dllearner.core.SparqlQueryLearningAlgorithm; -import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.NamedClass; -import org.dllearner.core.owl.ObjectProperty; -import org.dllearner.core.owl.Thing; import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.reasoning.SPARQLReasoner; -import org.ini4j.InvalidFileFormatException; import org.ini4j.Options; - import com.hp.hpl.jena.ontology.OntModelSpec; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; @@ -85,10 +62,6 @@ import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; -import com.hp.hpl.jena.sparql.expr.ExprAggregator; -import com.hp.hpl.jena.sparql.expr.ExprVar; -import com.hp.hpl.jena.sparql.expr.aggregate.AggCount; -import com.hp.hpl.jena.sparql.expr.aggregate.Aggregator; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -516,358 +489,88 @@ return relevantKeywords; } - private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ - logger.debug("Generating SPARQL query candidates..."); - - Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { - - @Override - public int compare(Slot o1, Slot o2) { - if(o1.getSlotType() == o2.getSlotType()){ - return o1.getToken().compareTo(o2.getToken()); - } else { - return -1; + // just for testing the HMM integration, getWeightedSPARQLQueriesOld is the original one + private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates) + { + // for testing + for(Template template: templates) + { + { + ArrayList<String> keywords = new ArrayList<String>(); + for(Slot slot: template.getSlots()) + { + keywords.add(slot.getWords().get(0)); } + if(template.getSlots().size()!=3) {continue;} + if(!keywords.contains("Mean Hamster Software")) {continue;} + if(!keywords.contains("published")) {continue;} + System.out.println("\"keywords\": "+keywords); } - }); - slot2Allocations = Collections.synchronizedMap(new HashMap<Slot, Set<Allocation>>()); - - - SortedSet<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); - - Set<Allocation> allocations; - - for(Template t : templates){ - logger.info("Processing template:\n" + t.toString()); - allocations = new TreeSet<Allocation>(); - boolean containsRegex = t.getQuery().toString().toLowerCase().contains("(regex("); - - ExecutorService executor = Executors.newFixedThreadPool(t.getSlots().size()); - List<Future<Map<Slot, SortedSet<Allocation>>>> list = new ArrayList<Future<Map<Slot, SortedSet<Allocation>>>>(); - - long startTime = System.currentTimeMillis(); - - for (Slot slot : t.getSlots()) { - if(!slot2Allocations.containsKey(slot)){//System.out.println(slot + ": " + slot.hashCode());System.out.println(slot2Allocations); - Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot); - Future<Map<Slot, SortedSet<Allocation>>> submit = executor.submit(worker); - list.add(submit); - } - } - - for (Future<Map<Slot, SortedSet<Allocation>>> future : list) { - try { - Map<Slot, SortedSet<Allocation>> result = future.get(); - Entry<Slot, SortedSet<Allocation>> item = result.entrySet().iterator().next(); - slot2Allocations.put(item.getKey(), item.getValue()); - } catch (InterruptedException e) { - e.printStackTrace(); - } catch (ExecutionException e) { - e.printStackTrace(); + System.out.println(template); + SortedSet<WeightedQuery> queries = new TreeSet<WeightedQuery>(); + Query query = template.getQuery(); + double score = 0; + + Map<List<String>,Collection<String>> segmentToURIs = new HashMap<List<String>,Collection<String>>(); + for(Slot slot: template.getSlots()) + { + List<String> segment = new LinkedList<String>(); + segment.add(slot.getWords().get(0)); // TODO: split it up? + + Set<String> uris = new HashSet<String>(); + + for(IndexResultItem item : getIndexResultItems(slot)) + { + uris.add(item.getUri()); } + segmentToURIs.put(segment,uris); } + HiddenMarkovModel hmm = new HiddenMarkovModel(); + hmm.initialization(); + hmm.startMarkovModel(segmentToURIs,true); + MultiMap<Double,List<String>> paths = hmm.getPaths(); - executor.shutdown(); + // System.out.println(hmm.getPaths()); + // die keywords jetzt in sadeehs algorithmus reinwerfen + // da kommen jetzt pfade raus mit unterschiedlichen wahrscheinlichkeiten + // HiddenMarkovModel HMM = new HiddenMarkovModel(); + // HMM.StartMarkovModel(); + // jetzt die variablen aus der query ersetzen mit den kandidaten + // ranked list der pfade, die die observation sequence generieren - - /*for(Slot slot : t.getSlots()){ - allocations = slot2Allocations2.get(slot); - if(allocations == null){ - allocations = computeAllocations(slot, 10); - slot2Allocations2.put(slot, allocations); - } - slot2Allocations.put(slot, allocations); - - //for tests add the property URI with http://dbpedia.org/property/ namespace - //TODO should be replaced by usage of a separate SOLR index - Set<Allocation> tmp = new HashSet<Allocation>(); - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(Allocation a : allocations){ - String uri = "http://dbpedia.org/property/" + a.getUri().substring(a.getUri().lastIndexOf("/")+1); - Allocation newA = new Allocation(uri, a.getSimilarity(), a.getProminence()); - newA.setScore(a.getScore()-0.000001); - tmp.add(newA); + for(Double d : paths.keySet()) + { + for(List<String> path : paths.get(d)) + { + Query q = new Query(query); + // TODO: which variable stands for which resource? do it randomly now to check if the replacement works and then correct the order later + System.out.println(q.getVariablesAsStringList()); + System.out.println(); + int i = 0; + for(String var : q.getVariablesAsStringList()) + { + q.replaceVarWithURI(var, path.get(i)); + i++; } - } - allocations.addAll(tmp); - }*/ - logger.debug("Time needed: " + (System.currentTimeMillis() - startTime) + "ms"); + System.out.println(q); - Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); - Query cleanQuery = t.getQuery(); - queries.add(new WeightedQuery(cleanQuery)); - Set<WeightedQuery> tmp = new TreeSet<WeightedQuery>(); - List<Slot> sortedSlots = new ArrayList<Slot>(); - Set<Slot> classSlots = new HashSet<Slot>(); - for(Slot slot : t.getSlots()){ - if(slot.getSlotType() == SlotType.CLASS){ - sortedSlots.add(slot); - classSlots.add(slot); + WeightedQuery wQuery = new WeightedQuery(q, score); + queries.add(wQuery); } } - for(Slot slot : t.getSlots()){ - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY || slot.getSlotType() == SlotType.DATATYPEPROPERTY){ - sortedSlots.add(slot); - } - } - for(Slot slot : t.getSlots()){ - if(!sortedSlots.contains(slot)){ - sortedSlots.add(slot); - } - } - //add for each SYMPROPERTY Slot the reversed query - for(Slot slot : sortedSlots){ - for(WeightedQuery wQ : queries){ - if(slot.getSlotType() == SlotType.SYMPROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY){ - Query reversedQuery = new Query(wQ.getQuery()); - reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); - tmp.add(new WeightedQuery(reversedQuery)); - } - tmp.add(wQ); - } - queries.clear(); - queries.addAll(tmp); - tmp.clear(); - } + //System.exit(0); + return queries; + // >> SLOTS: + // y0: RESOURCE {Mean Hamster Software} + // p0: OBJECTPROPERTY {published,print} + // p1: CLASS {video games} - for(Slot slot : sortedSlots){ - if(!slot2Allocations.get(slot).isEmpty()){ - for(Allocation a : slot2Allocations.get(slot)){ - for(WeightedQuery query : queries){ - Query q = new Query(query.getQuery()); - boolean drop = false; - if(useDomainRangeRestriction){ - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); - // System.out.println(triple); - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ - // System.out.println(typeTriple); - if(true){//reasoner.isObjectProperty(a.getUri())){ - Description range = reasoner.getRange(new ObjectProperty(a.getUri())); - // System.out.println(a); - if(range != null){ - Set<Description> allRanges = new HashSet<Description>(); - SortedSet<Description> superClasses; - if(range instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(range); - allRanges.addAll(superClasses); - } else { - for(Description nc : range.getChildren()){ - superClasses = reasoner.getSuperClasses(nc); - allRanges.addAll(superClasses); - } - } - allRanges.add(range); - allRanges.remove(new NamedClass(Thing.instance.getURI())); - - Set<Description> allTypes = new HashSet<Description>(); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Description type = new NamedClass(typeURI); - superClasses = reasoner.getSuperClasses(type); - allTypes.addAll(superClasses); - allTypes.add(type); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; - } - } - } else { - drop = true; - } - - } - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ - Description domain = reasoner.getDomain(new ObjectProperty(a.getUri())); - // System.out.println(a); - if(domain != null){ - Set<Description> allDomains = new HashSet<Description>(); - SortedSet<Description> superClasses; - if(domain instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(domain); - allDomains.addAll(superClasses); - } else { - for(Description nc : domain.getChildren()){ - superClasses = reasoner.getSuperClasses(nc); - allDomains.addAll(superClasses); - } - } - allDomains.add(domain); - allDomains.remove(new NamedClass(Thing.instance.getURI())); - - Set<Description> allTypes = new HashSet<Description>(); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Description type = new NamedClass(typeURI); - superClasses = reasoner.getSuperClasses(type); - allTypes.addAll(superClasses); - allTypes.add(type); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; - } else { - - } - } - } - } - } - } - - if(!drop){ - if(slot.getSlotType() == SlotType.RESOURCE){//avoid queries where predicate is data property and object resource->add REGEX filter in this case - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - SPARQL_Value object = triple.getValue(); - if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position - SPARQL_Property predicate = triple.getProperty(); - if(!predicate.isVariable()){//only consider triple where predicate is URI - String predicateURI = predicate.getName().replace("<", "").replace(">", ""); - if(isDatatypeProperty(predicateURI)){//if data property - q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - } - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - WeightedQuery w = new WeightedQuery(q); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - w.addAllocations(query.getAllocations()); - w.addAllocation(a); - tmp.add(w); - } - - - } - } - //lower queries with FILTER-REGEX - if(containsRegex){ - for(WeightedQuery wQ : tmp){ - wQ.setScore(wQ.getScore() - 0.01); - } - } - - queries.clear(); - queries.addAll(tmp);//System.out.println(tmp); - tmp.clear(); - } else {//Add REGEX FILTER if resource slot is empty and predicate is datatype property - if(slot.getSlotType() == SlotType.RESOURCE){ - for(WeightedQuery query : queries){ - Query q = query.getQuery(); - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - SPARQL_Value object = triple.getValue(); - if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position - SPARQL_Property predicate = triple.getProperty(); - if(!predicate.isVariable()){//only consider triple where predicate is URI - String predicateURI = predicate.getName().replace("<", "").replace(">", ""); - if(isDatatypeProperty(predicateURI)){//if data property - q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); - } - } - } - } - - } - - } else { - if(slot.getSlotType() == SlotType.SYMPROPERTY){ - for(WeightedQuery wQ : queries){ - List<SPARQL_Triple> triples = wQ.getQuery().getTriplesWithVar(slot.getAnchor()); - for(SPARQL_Triple triple : triples){ - String typeVar; - String resourceURI; - SymPropertyDirection direction; - if(triple.getValue().isVariable()){ - direction = SymPropertyDirection.VAR_RIGHT; - typeVar = triple.getValue().getName(); - resourceURI = triple.getVariable().getName(); - } else { - direction = SymPropertyDirection.VAR_LEFT; - typeVar = triple.getVariable().getName(); - resourceURI = triple.getValue().getName(); - } - resourceURI = resourceURI.replace("<", "").replace(">", ""); - List<SPARQL_Triple> typeTriples = wQ.getQuery().getRDFTypeTriples(typeVar); - for(SPARQL_Triple typeTriple : typeTriples){ - String typeURI = typeTriple.getValue().getName().replace("<", "").replace(">", ""); - // List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction); - // for(Entry<String, Integer> property : mostFrequentProperties){ - // wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey()); - // wQ.setScore(wQ.getScore() + 0.1); - // } - } - - } - } - } - } - // else if(slot.getSlotType() == SlotType.CLASS){ - // String token = slot.getWords().get(0); - // if(slot.getToken().contains("house")){ - // String regexToken = token.replace("houses", "").replace("house", "").trim(); - // try { - // Map<Slot, SortedSet<Allocation>> ret = new SlotProcessor(new Slot(null, SlotType.CLASS, Collections.singletonList("house"))).call(); - // SortedSet<Allocation> alloc = ret.entrySet().iterator().next().getValue(); - // if(alloc != null && !alloc.isEmpty()){ - // String uri = alloc.first().getUri(); - // for(WeightedQuery query : queries){ - // Query q = query.getQuery(); - // for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - // SPARQL_Term subject = triple.getVariable(); - // SPARQL_Term object = new SPARQL_Term("desc"); - // object.setIsVariable(true); - // object.setIsURI(false); - // q.addCondition(new SPARQL_Triple(subject, new SPARQL_Property("<http://purl.org/goodrelations/v1#description>"), object)); - // q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - // object, "'" + regexToken + "'", SPARQL_PairType.REGEX))); - // } - // q.replaceVarWithURI(slot.getAnchor(), uri); - // - // } - // } - // } catch (Exception e) { - // e.printStackTrace(); - // } - // } - // } - - - } - - } - for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) { - WeightedQuery wQ = iterator.next(); - if(dropZeroScoredQueries){ - if(wQ.getScore() <= 0){ - iterator.remove(); - } - } else { - if(t.getSlots().size()==0) throw new AssertionError("no slots for query "+wQ); - wQ.setScore(wQ.getScore()/t.getSlots().size()); - } - - } - allQueries.addAll(queries); - List<Query> qList = new ArrayList<Query>(); - for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery()); - qList.add(wQ.getQuery()); - } - template2Queries.put(t, qList); + // System.out.println(template); } - logger.debug("...done in "); - return allQueries; + // + return null; } private double getProminenceValue(String uri, SlotType type){ @@ -1142,101 +845,51 @@ } - class SlotProcessor implements Callable<Map<Slot, SortedSet<Allocation>>>{ - - private Slot slot; - - public SlotProcessor(Slot slot) { - this.slot = slot; - } - - @Override - public Map<Slot, SortedSet<Allocation>> call() throws Exception { - Map<Slot, SortedSet<Allocation>> result = new HashMap<Slot, SortedSet<Allocation>>(); - result.put(slot, computeAllocations(slot)); - return result; - } - - private SortedSet<Allocation> computeAllocations(Slot slot){ - logger.debug("Computing allocations for slot: " + slot); - SortedSet<Allocation> allocations = new TreeSet<Allocation>(); - - Index index = getIndexBySlotType(slot); - - IndexResultSet rs; - for(String word : slot.getWords()){ - rs = new IndexResultSet(); - if(mappingIndex != null){ - SlotType type = slot.getSlotType(); - if(type == SlotType.CLASS){ - rs.add(mappingIndex.getClassesWithScores(word)); - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ - rs.add(mappingIndex.getPropertiesWithScores(word)); - } else if(type == SlotType.DATATYPEPROPERTY){ - rs.add(mappingIndex.getDatatypePropertiesWithScores(word)); - } else if(type == SlotType.OBJECTPROPERTY){ - rs.add(mappingIndex.getObjectPropertiesWithScores(word)); - } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ - rs.add(mappingIndex.getResourcesWithScores(word)); - } + private Set<IndexResultItem> getIndexResultItems(Slot slot) + { +// List<String> uris = new LinkedList<String>(); + Set<IndexResultItem> indexResultItems = new HashSet<IndexResultItem>(); + + Index index = getIndexBySlotType(slot); + + for(String word : slot.getWords()) + { + IndexResultSet rs = new IndexResultSet(); + if(mappingIndex != null){ + SlotType type = slot.getSlotType(); + if(type == SlotType.CLASS){ + rs.add(mappingIndex.getClassesWithScores(word)); + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ + rs.add(mappingIndex.getPropertiesWithScores(word)); + } else if(type == SlotType.DATATYPEPROPERTY){ + rs.add(mappingIndex.getDatatypePropertiesWithScores(word)); + } else if(type == SlotType.OBJECTPROPERTY){ + rs.add(mappingIndex.getObjectPropertiesWithScores(word)); + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + rs.add(mappingIndex.getResourcesWithScores(word)); } - //use the non manual indexes only if mapping based resultset is not empty and option is set - if(!useManualMappingsIfExistOnly || rs.isEmpty()){ - if(slot.getSlotType() == SlotType.RESOURCE){ - rs.add(index.getResourcesWithScores(word, 20)); - } else { - if(slot.getSlotType() == SlotType.CLASS){ - word = PlingStemmer.stem(word); - } - rs.add(index.getResourcesWithScores(word, 20)); + } + //use the non manual indexes only if mapping based resultset is not empty and option is set + if(!useManualMappingsIfExistOnly || rs.isEmpty()){ + if(slot.getSlotType() == SlotType.RESOURCE){ + rs.add(index.getResourcesWithScores(word, 20)); + } else { + if(slot.getSlotType() == SlotType.CLASS){ + word = PlingStemmer.stem(word); } + rs.add(index.getResourcesWithScores(word, 20)); } - - - for(IndexResultItem item : rs.getItems()){ - double similarity = Similarity.getSimilarity(word, item.getLabel()); - // //get the labels of the redirects and compute the highest similarity - // if(slot.getSlotType() == SlotType.RESOURCE){ - // Set<String> labels = getRedirectLabels(item.getUri()); - // for(String label : labels){ - // double tmp = Similarity.getSimilarity(word, label); - // if(tmp > similarity){ - // similarity = tmp; - // } - // } - // } - double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); - allocations.add(new Allocation(item.getUri(), prominence, similarity)); - } - } - - normProminenceValues(allocations); - - computeScore(allocations); - logger.debug("Found " + allocations.size() + " allocations for slot " + slot); - return new TreeSet<Allocation>(allocations); +// for(IndexResultItem item: rs.getItems()) +// { +// uris.add(item.getUri()); +// } + indexResultItems.addAll(rs.getItems()); } - - private Index getIndexBySlotType(Slot slot){ - Index index = null; - SlotType type = slot.getSlotType(); - if(type == SlotType.CLASS){ - index = classesIndex; - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ - index = propertiesIndex; - } else if(type == SlotType.DATATYPEPROPERTY){ - index = datatypePropertiesIndex; - } else if(type == SlotType.OBJECTPROPERTY){ - index = objectPropertiesIndex; - } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ - index = resourcesIndex; - } - return index; - } - + return indexResultItems; } + public String getTaggedInput() { if(templateGenerator==null) {throw new AssertionError("Learner not initialized. Please call init();");} @@ -1255,33 +908,33 @@ return isDatatypeProperty; } - /** - * @param args - * @throws NoTemplateFoundException - * @throws IOException - * @throws FileNotFoundException - * @throws InvalidFileFormatException - */ - public static void main(String[] args) throws Exception { - SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), - Collections.<String>singletonList(""), Collections.<String>emptyList()); - Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources"); - Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes"); - Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties"); + // /** + // * @param args + // * @throws NoTemplateFoundException + // * @throws IOException + // * @throws FileNotFoundException + // * @throws InvalidFileFormatException + // */ + // public static void main(String[] args) throws Exception { + // SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), + // Collections.<String>singletonList(""), Collections.<String>emptyList()); + // Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources"); + // Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes"); + // Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties"); + // + // SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); + // learner.init(); + // + // String question = "What is the highest mountain?"; + // + // learner.setQuestion(question); + // learner.learnSPARQLQueries(); + // System.out.println("Learned query:\n" + learner.getBestSPARQLQuery()); + // System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType()); + // System.out.println(learner.getLearnedPosition()); + // + // } - SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); - learner.init(); - String question = "What is the highest mountain?"; - learner.setQuestion(question); - learner.learnSPARQLQueries(); - System.out.println("Learned query:\n" + learner.getBestSPARQLQuery()); - System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType()); - System.out.println(learner.getLearnedPosition()); - - } - - - } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2012-09-19 15:12:20 UTC (rev 3845) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2012-09-19 15:15:56 UTC (rev 3846) @@ -58,7 +58,7 @@ unions = new HashSet<SPARQL_Union>(); } - //copy constructor + /** copy constructor*/ public Query(Query query){ this.qt = query.getQt(); Set<SPARQL_Term> selTerms = new HashSet<SPARQL_Term>(); Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-19 15:12:20 UTC (rev 3845) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-19 15:15:56 UTC (rev 3846) @@ -114,7 +114,7 @@ test("QALD 2 Benchmark ideally tagged", file,SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache,dbpediaLiveKnowledgebase,null,null); } - @Test public void testOxford() throws Exception + /*@Test*/ public void testOxford() throws Exception { File file = new File(getClass().getClassLoader().getResource("tbsl/evaluation/oxford_working_questions.xml").getFile()); test("Oxford 19 working questions", file,null,null,null,loadOxfordModel(),getOxfordMappingIndex()); @@ -160,6 +160,26 @@ logger.info("learned query: "+testData.id2Query.get(0)); } + /** For debugging one question in particular. + */ + @Test public void testSingleQueryDBpedia() + { +// Logger.getLogger(Templator.class).setLevel(Level.DEBUG); +// Logger.getLogger(Parser.class).setLevel(Level.DEBUG); +// Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.DEBUG); + // String question = "houses for less than 900000 pounds"; + String question = "Give/VB me/PRP all/DT video/JJ games/NNS published/VBN by/IN Mean/NNP Hamster/NNP Software/NNP"; +// String question = "give me all video games published by mean hamster software"; +// String question = "Give me all video games published by Mean Hamster Software"; +// question = new StanfordPartOfSpeechTagger().tag(question); +// System.out.println(question); + +// Model model = loadOxfordModel(); + QueryTestData testData = new QueryTestData(); + new LearnQueryCallable(question, 0, testData, dbpediaLiveKnowledgebase, true).call(); + logger.info("learned query: "+testData.id2Query.get(0)); + } + /*@Test*/ public void generateXMLOxford() throws IOException { boolean ADD_POS_TAGS = true; @@ -935,6 +955,7 @@ learner = new SPARQLTemplateBasedLearner2(knowledgeBase,pretagged?null:POSTaggerHolder.posTagger,wordnet,options); try {learner.init();} catch (ComponentInitException e) {throw new RuntimeException(e);} learner.setUseIdealTagger(pretagged); + learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex"}); } public LearnQueryCallable(String question, int id, QueryTestData testData, Model model,MappingBasedIndex index,boolean pretagged) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-09-19 15:12:30
|
Revision: 3845 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3845&view=rev Author: kirdie Date: 2012-09-19 15:12:20 +0000 (Wed, 19 Sep 2012) Log Message: ----------- creating branch for hidden markov model testing Added Paths: ----------- branches/hmm/ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-09-17 12:02:27
|
Revision: 3844 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3844&view=rev Author: kirdie Date: 2012-09-17 12:02:15 +0000 (Mon, 17 Sep 2012) Log Message: ----------- Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Added Paths: ----------- trunk/components-ext/src/main/resources/tbsl/evaluation/oxford_working_questions.xml Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-12 22:56:17 UTC (rev 3843) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-17 12:02:15 UTC (rev 3844) @@ -494,6 +494,7 @@ max = a.getProminence(); } } + if(min==max) {return;} for(Allocation a : allocations){ double prominence = a.getProminence()/(max-min); a.setProminence(prominence); @@ -905,13 +906,14 @@ if(popularity == null){ popularity = Integer.valueOf(0); } + System.out.println(popularity); // if(cnt == 0){ // return 0; // } // return Math.log(cnt); - if(Double.isNaN(popularity)) {throw new AssertionError("prominence NaN for uri "+uri+", slot type "+type);} + if(popularity!=popularity) {throw new AssertionError("prominence NaN for uri "+uri+", slot type "+type);} return popularity; } Added: trunk/components-ext/src/main/resources/tbsl/evaluation/oxford_working_questions.xml =================================================================== (Binary files differ) Property changes on: trunk/components-ext/src/main/resources/tbsl/evaluation/oxford_working_questions.xml ___________________________________________________________________ Added: svn:mime-type + application/xml Modified: trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt 2012-09-12 22:56:17 UTC (rev 3843) +++ trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt 2012-09-17 12:02:15 UTC (rev 3844) @@ -1,4 +1,4 @@ http://www.w3.org/2006/vcard/ns#street-address|address, location, postal code http://purl.org/goodrelations/v1#description|description -http://purl.org/goodrelations/v1#hasPrice|has price, price +http://diadem.cs.ox.ac.uk/ontologies/real-estate#hasPrice|has price, price http://diadem.cs.ox.ac.uk/ontologies/real-estate#receptions|receptions, reception room, reception rooms \ No newline at end of file Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-09-12 22:56:17 UTC (rev 3843) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-09-17 12:02:15 UTC (rev 3844) @@ -12,6 +12,8 @@ import org.dllearner.algorithm.tbsl.learning.NoTemplateFoundException; import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner2; +import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner3Test; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SPARQLIndex; import org.dllearner.common.index.VirtuosoClassesIndex; @@ -46,7 +48,10 @@ OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); +// SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); + SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2 + (SPARQLTemplateBasedLearner3Test.loadOxfordModel(),SPARQLTemplateBasedLearner3Test.getOxfordMappingIndex(), new StanfordPartOfSpeechTagger()); + learner.setMappingIndex(mappingIndex); learner.init(); learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-09-12 22:56:17 UTC (rev 3843) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-09-17 12:02:15 UTC (rev 3844) @@ -35,7 +35,7 @@ import org.xml.sax.SAXException; import com.hp.hpl.jena.rdf.model.Model; -class QueryTestData implements Serializable +public class QueryTestData implements Serializable { public SortedMap<Integer, String> id2Question = new ConcurrentSkipListMap<Integer, String>(); public SortedMap<Integer, String> id2Query = new ConcurrentSkipListMap<Integer, String>(); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-12 22:56:17 UTC (rev 3843) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-17 12:02:15 UTC (rev 3844) @@ -81,7 +81,8 @@ import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.sparql.engine.http.QueryExceptionHTTP; -/** Tests TSBL against the qald2 benchmark test data with the DBpedia endpoint. +/** If you just want to test the standard queries, activate justTestTheLastWorkingOnesDBpedia() and testOxford(). + * Tests TSBL against the qald2 benchmark test data with the DBpedia endpoint. * The qald2 endpoint is not used because it may not always be available. * To speed up the process at first the test file is read and an updated copy of it is saved that * only contains the questions where the reference query does not return a nonempty list of resources. @@ -105,18 +106,66 @@ private static final boolean OXFORD_PRETAGGED = false; /*@Test*/ public void testDBpedia() throws Exception - {test("QALD 2 Benchmark ideally tagged", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), - SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache);} + { + File file = generateTestDataIfNecessary( + new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), + SparqlEndpoint.getEndpointDBpedia(), + dbpediaLiveCache); + test("QALD 2 Benchmark ideally tagged", file,SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache,dbpediaLiveKnowledgebase,null,null); + } - /*@Test*/ public void testOxford() throws Exception + @Test public void testOxford() throws Exception { + File file = new File(getClass().getClassLoader().getResource("tbsl/evaluation/oxford_working_questions.xml").getFile()); + test("Oxford 19 working questions", file,null,null,null,loadOxfordModel(),getOxfordMappingIndex()); } - @Test public void generateXMLOxford() throws IOException +// /*@Test*/ public void testOxford() throws Exception +// { +// Model model = loadOxfordModel(); +// QueryTestData testData = QueryTestData.readQaldXml(new File("log/oxford_working_questions.xml")); +// // answers are not included at least in the first query TODO: check, why +// testData.generateAnswers(null, null, model); +// QueryTestData newTestData = generateTestDataMultiThreaded(testData.id2Question, null, model,getOxfordMappingIndex() , OXFORD_PRETAGGED); +// newTestData.generateAnswers(null, null, model); +// for(int i : testData.id2Question.keySet()) +// { +// logger.info("Comparing answers for question "+testData.id2Question.get(i)); +// String referenceQuery = testData.id2Query.get(i); +// String newQuery = newTestData.id2Query.get(i); +// if(!referenceQuery.equals(newQuery)) +// { +// logger.warn("not equal, reference query: "+referenceQuery+", new query: "+newQuery); +// Collection<String> referenceAnswers = testData.id2Answers.get(i); +// Collection<String> newAnswers = newTestData.id2Answers.get(i); +// if(!referenceAnswers.equals(newAnswers)) fail("not equal, reference answers: "+referenceAnswers+", new answers: "+newAnswers); +// } +// } +// } + + /** For debugging one question in particular. + */ + /*@Test*/ public void testSingleQueryOxford() { - boolean ADD_POS_TAGS = false; - PartOfSpeechTagger posTagger = new StanfordPartOfSpeechTagger(); + Logger.getLogger(Templator.class).setLevel(Level.DEBUG); + Logger.getLogger(Parser.class).setLevel(Level.DEBUG); + Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.DEBUG); + // String question = "houses for less than 900000 pounds"; + String question = "houses/NNS for/IN less/JJR than/IN 900000/CD pounds/NNS"; + //question = new StanfordPartOfSpeechTagger().tag(question); + Model model = loadOxfordModel(); + QueryTestData testData = new QueryTestData(); + new LearnQueryCallable(question, 0, testData, model, getOxfordMappingIndex(), true).call(); + logger.info("learned query: "+testData.id2Query.get(0)); + } + + /*@Test*/ public void generateXMLOxford() throws IOException + { + boolean ADD_POS_TAGS = true; + PartOfSpeechTagger posTagger = null; + if(ADD_POS_TAGS) {posTagger=new StanfordPartOfSpeechTagger();} + Model model = loadOxfordModel(); List<String> questions = new LinkedList<String>(); BufferedReader in = new BufferedReader((new InputStreamReader(getClass().getClassLoader().getResourceAsStream("tbsl/oxford_eval_queries.txt")))); int j=0; @@ -132,18 +181,23 @@ SortedMap<Integer,String> id2Question = new TreeMap<Integer, String>(); Iterator<String> it = questions.iterator(); for(int i=0;i<questions.size();i++) {id2Question.put(i, it.next());} - MappingBasedIndex mappingIndex= new MappingBasedIndex( + MappingBasedIndex mappingIndex= getOxfordMappingIndex(); + QueryTestData testData = generateTestDataMultiThreaded(id2Question, null,model,mappingIndex,ADD_POS_TAGS||OXFORD_PRETAGGED); + testData.generateAnswers(null, null, model); + testData.writeQaldXml(new File("log/test.xml")); + } + + public static MappingBasedIndex getOxfordMappingIndex() + { + return new MappingBasedIndex( SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - QueryTestData testData = generateTestDataMultiThreaded(id2Question, null,model,mappingIndex,ADD_POS_TAGS||OXFORD_PRETAGGED); - testData.generateAnswers(null, null, model); - testData.writeQaldXml(new File("log/test.xml")); } - private Model loadOxfordModel() + public static Model loadOxfordModel() { // load it into a model because we can and it's faster and doesn't rely on endpoint availability // the files are located in the paper svn under question-answering-iswc-2012/data @@ -189,7 +243,7 @@ // switch(type) {case "ttl":type="TURTLE";break;case "owl":type="RDF/XML";break;default:throw new RuntimeException("filetype "+ending+" not handled.");} // no Java 1.7 :-( try{ // m.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/data/"+s)), null, type);}catch (FileNotFoundException e) {} - m.read(getClass().getClassLoader().getResourceAsStream("oxford/"+s),null, type);} + m.read(SPARQLTemplateBasedLearner3Test.class.getClassLoader().getResourceAsStream("oxford/"+s),null, type);} catch(RuntimeException e) {throw new RuntimeException("Could not read into model: "+s,e);} } // test("Oxford evaluation questions", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), @@ -215,9 +269,10 @@ } } - public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException + public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache,Knowledgebase kb, Model model, MappingBasedIndex index) + throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { - generateTestDataIfNecessaryAndEvaluateAndWrite(title,referenceXML,endpoint,cache); + evaluateAndWrite(title,referenceXML,endpoint,cache,kb,model,index); generateHTML(title); // if(evaluation.numberOfCorrectAnswers<3) {fail("only " + evaluation.numberOfCorrectAnswers+" correct answers.");} @@ -245,26 +300,31 @@ learnedTestData.write();*/ } - private void generateTestDataIfNecessaryAndEvaluateAndWrite(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException + private File generateTestDataIfNecessary(final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException { String dir = "cache/"+getClass().getSimpleName()+"/"; - new File(dir).mkdirs(); File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); + if(!updatedReferenceXML.exists()) { - logger.info("Generating updated reference for "+title); + logger.info("Generating updated reference."); generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache,null); } + return updatedReferenceXML; + } + private void evaluateAndWrite(String title,final File updatedReferenceXML, final SparqlEndpoint endpoint,ExtractionDBCache cache, + Knowledgebase kb, Model model, MappingBasedIndex index) + { QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML); logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); long startLearning = System.currentTimeMillis(); - QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, dbpediaLiveKnowledgebase,null,null,DBPEDIA_PRETAGGED); + QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, kb,model,index,DBPEDIA_PRETAGGED); long endLearning = System.currentTimeMillis(); logger.info("finished learning after "+(endLearning-startLearning)/1000.0+"s"); - learnedTestData.generateAnswers(endpoint,cache,null); + learnedTestData.generateAnswers(endpoint,cache,model); long endGeneratingAnswers = System.currentTimeMillis(); logger.info("finished generating answers in "+(endGeneratingAnswers-endLearning)/1000.0+"s"); Evaluation evaluation = evaluate(referenceTestData, learnedTestData); @@ -756,7 +816,7 @@ public static Set<String> getUris(final SparqlEndpoint endpoint, final String query, ExtractionDBCache cache, Model model) { if(query==null) {throw new AssertionError("query is null");} -// if(endpoint==null) {throw new AssertionError("endpoint is null");} + // if(endpoint==null) {throw new AssertionError("endpoint is null");} if(!query.contains("SELECT")&&!query.contains("select")) {return Collections.<String>emptySet();} // abort when not a select query Set<String> uris = new HashSet<String>(); // QueryEngineHTTP qe = new QueryEngineHTTP(DBPEDIA_LIVE_ENDPOINT_URL_STRING, query); @@ -851,6 +911,8 @@ // // return kb; // } + /** @author konrad + * Learns a query for a question and puts it into the given testData object. * */ private static class LearnQueryCallable implements Callable<LearnStatus> { private final String question; @@ -897,7 +959,7 @@ @Override public LearnStatus call() { - + logger.trace("learning question: "+question); try { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-09-12 22:56:24
|
Revision: 3843 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3843&view=rev Author: lorenz_b Date: 2012-09-12 22:56:17 +0000 (Wed, 12 Sep 2012) Log Message: ----------- Updated Debian package creation script. Modified Paths: -------------- trunk/interfaces/src/deb-package/build_debpkg.sh Added Paths: ----------- trunk/interfaces/src/deb-package/debian/ trunk/interfaces/src/deb-package/debian/README trunk/interfaces/src/deb-package/debian/README.Debian trunk/interfaces/src/deb-package/debian/README.source trunk/interfaces/src/deb-package/debian/changelog trunk/interfaces/src/deb-package/debian/compat trunk/interfaces/src/deb-package/debian/control trunk/interfaces/src/deb-package/debian/copyright trunk/interfaces/src/deb-package/debian/dl-learner-interfaces.install trunk/interfaces/src/deb-package/debian/dl-learner-interfaces.substvars trunk/interfaces/src/deb-package/debian/docs trunk/interfaces/src/deb-package/debian/files trunk/interfaces/src/deb-package/debian/postinst trunk/interfaces/src/deb-package/debian/rules trunk/interfaces/src/deb-package/debian/source/ trunk/interfaces/src/deb-package/debian/source/format Modified: trunk/interfaces/src/deb-package/build_debpkg.sh =================================================================== --- trunk/interfaces/src/deb-package/build_debpkg.sh 2012-09-03 14:28:33 UTC (rev 3842) +++ trunk/interfaces/src/deb-package/build_debpkg.sh 2012-09-12 22:56:17 UTC (rev 3843) @@ -1,31 +1,30 @@ #!/bin/sh -MODULE_NAME=interfaces-0.5-SNAPSHOT + +# $1 the version passed by the maven call + +PACKAGE_NAME=dllearner-interfaces +VERSION=$1 + echo "Building Debian package for ${MODULE_NAME}" echo -rm -rf ../../target/dl-learner-interfaces-0.5 -mkdir -p ../../target/dl-learner-interfaces-0.5/usr/share/dllearner -mkdir -p ../../target/dl-learner-interfaces-0.5/usr/share/pixmaps -mkdir -p ../../target/dl-learner-interfaces-0.5/usr/share/applications + +rm -rf ../../target/deb-pkg +mkdir -p ../../target/deb-pkg/bin + # Extract the tarball to the package workspace #tar xfz data.tar.gz --directory ../../target/deb-pkg + # copy war file to package workspace -cp ../../target/interfaces-jar-with-dependencies.jar ../../target/dl-learner-interfaces-0.5/usr/share/dllearner -cp dllearner-gui dllearner-gui.desktop -cp dllearner-cli dllearner-cli.desktop -mv -v dllearner-gui.desktop ../../target/dl-learner-interfaces-0.5/usr/share/applications -mv -v dllearner-cli.desktop ../../target/dl-learner-interfaces-0.5/usr/share/applications -cp ../../target/appassembler/bin/StartCLI ../../target/appassembler/bin/dllearner-CLI.sh -cp ../../target/appassembler/bin/StartGUI ../../target/appassembler/bin/dllearner-GUI.sh -mv ../../target/appassembler/bin/dllearner-CLI.sh ../../target/dl-learner-interfaces-0.5/usr/share/dllearner -mv ../../target/appassembler/bin/dllearner-GUI.sh ../../target/dl-learner-interfaces-0.5/usr/share/dllearner -cp ../../../images/logos/dllearner_small.png ../../target/dl-learner-interfaces-0.5/usr/share/pixmaps +# remove the version in the name +cp ../../target/dl-learner-dist/bin/cli ../../target/deb-pkg/bin/ +cp ../../target/dl-learner-dist/bin/enrichment ../../target/deb-pkg/bin/ +cp -r ../../target/dl-learner-dist/lib/ ../../target/deb-pkg/ +cp -r ../../target/original-interfaces.jar ../../target/deb-pkg/lib/interfaces.jar + # Add the Debian control files -cd dl-learner-interfaces-0.5/debian -dch -n -cd .. -cp -r debian ../../../target/dl-learner-interfaces-0.5 -cd ../../../target/dl-learner-interfaces-0.5/debian -# Build the package and sign it. -cd ../../../target/dl-learner-interfaces-0.5 -debuild --check-dirname-level 0 +cp -r debian ../../target/deb-pkg +# Build the package +cd ../../target/deb-pkg +debuild --check-dirname-level 0 -b + Added: trunk/interfaces/src/deb-package/debian/README =================================================================== --- trunk/interfaces/src/deb-package/debian/README (rev 0) +++ trunk/interfaces/src/deb-package/debian/README 2012-09-12 22:56:17 UTC (rev 3843) @@ -0,0 +1,6 @@ +The Debian Package dl-learner-interfaces +---------------------------- + +Comments regarding the Package + + -- Heero Yuy <chr...@ya...> Thu, 07 Jul 2011 20:16:12 +0200 Added: trunk/interfaces/src/deb-package/debian/README.Debian =================================================================== --- trunk/interfaces/src/deb-package/debian/README.Debian (rev 0) +++ trunk/interfaces/src/deb-package/debian/README.Debian 2012-09-12 22:56:17 UTC (rev 3843) @@ -0,0 +1,6 @@ +dl-learner-interfaces for Debian +-------------------------------- + +<possible notes regarding this package - if none, delete this file> + + -- Heero Yuy <chr...@ya...> Thu, 07 Jul 2011 20:16:12 +0200 Added: trunk/interfaces/src/deb-package/debian/README.source =================================================================== --- trunk/interfaces/src/deb-package/debian/README.source (rev 0) +++ trunk/interfaces/src/deb-package/debian/README.source 2012-09-12 22:56:17 UTC (rev 3843) @@ -0,0 +1,9 @@ +dl-learner-interfaces for Debian +-------------------------------- + +<this file describes information about the source package, see Debian policy +manual section 4.14. You WILL either need to modify or delete this file> + + + + Added: trunk/interfaces/src/deb-package/debian/changelog =================================================================== --- trunk/interfaces/src/deb-package/debian/changelog (rev 0) +++ trunk/interfaces/src/deb-package/debian/changelog 2012-09-12 22:56:17 UTC (rev 3843) @@ -0,0 +1,17 @@ +dl-learner-interfaces (0.6-1) lod2; urgency=low + + * Added enrichment algorithms for learning schema axioms on SPARQL knowledge bases. + + -- Lorenz Buehmann <bue...@in...> Wed, 12 Sep 2012 17:52:18 +0200 + +dl-learner-interfaces (0.5-2) lod2; urgency=low + + * Added start scripts for the interfaces + + -- Christian Kötteritzsch (Der Student) <chr...@ya...> Thu, 14 Jul 2011 12:36:41 +0200 + +dl-learner-interfaces (0.5-1) lod2; urgency=low + + * Initial Release. + + -- Christian Kötteritzsch (Der Student) <chr...@ya...> Thu, 07 Jul 2011 20:16:12 +0200 Added: trunk/interfaces/src/deb-package/debian/compat =================================================================== --- trunk/interfaces/src/deb-package/debian/compat (rev 0) +++ trunk/interfaces/src/deb-package/debian/compat 2012-09-12 22:56:17 UTC (rev 3843) @@ -0,0 +1 @@ +7 Added: trunk/interfaces/src/deb-package/debian/control =================================================================== --- trunk/interfaces/src/deb-package/debian/control (rev 0) +++ trunk/interfaces/src/deb-package/debian/control 2012-09-12 22:56:17 UTC (rev 3843) @@ -0,0 +1,15 @@ +Source: dl-learner-interfaces +Section: misc +Priority: extra +Maintainer: Jens Lehmann <le...@in...> +Build-Depends: debhelper (>= 7.0.50~) +Standards-Version: 3.9.1 +Homepage: http://dl-learner.org +#Vcs-Git: git://git.debian.org/collab-maint/dl-learner-interfaces.git +#Vcs-Browser: http://git.debian.org/?p=collab-maint/dl-learner-interfaces.git;a=summary + +Package: dl-learner-interfaces +Architecture: all +Depends: openjdk-6-jre | sun-java6-jre, dl-learner-components-core +Description: This package provides interfaces to use DL-Learner: + a commandline interface, a graphical user interface and a web service. You can start them using the commands dl-learner-cli, dl-learner-gui and dl-learner-ws. Added: trunk/interfaces/src/deb-package/debian/copyright =================================================================== --- trunk/interfaces/src/deb-package/debian/copyright (rev 0) +++ trunk/interfaces/src/deb-package/debian/copyright 2012-09-12 22:56:17 UTC (rev 3843) @@ -0,0 +1,31 @@ +Format: http://dep.debian.net/deps/dep5 +Upstream-Name: dl-learner-interfaces +Source: <url://example.com> + +Files: * +Copyright: <years> <put author's name and email here> + <years> <likewise for another author> +License: Apache-2.0 + +Files: debian/* +Copyright: 2011 Heero Yuy <chr...@ya...> +License: Apache-2.0 + +License: Apache-2.0 + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + . + http://www.apache.org/licenses/LICENSE-2.0 + . + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + . + On Debian systems, the complete text of the Apache version 2.0 license + can be found in "/usr/share/common-licenses/Apache-2.0". + +# Please also look if there are files or directories which have a +# different copyright/license attached and list them here. Added: trunk/interfaces/src/deb-package/debian/dl-learner-interfaces.install =================================================================== --- trunk/interfaces/src/deb-package/debian/dl-learner-interfaces.install (rev 0) +++ trunk/interfaces/src/deb-package/debian/dl-learner-interfaces.install 2012-09-12 22:56:17 UTC (rev 3843) @@ -0,0 +1,2 @@ +lib /usr/share/dllearner +bin /usr/share/dllearner \ No newline at end of file Added: trunk/interfaces/src/deb-package/debian/dl-learner-interfaces.substvars =================================================================== --- trunk/interfaces/src/deb-package/debian/dl-learner-interfaces.substvars (rev 0) +++ trunk/interfaces/src/deb-package/debian/dl-learner-interfaces.substvars 2012-09-12 22:56:17 UTC (rev 3843) @@ -0,0 +1 @@ +misc:Depends= Added: trunk/interfaces/src/deb-package/debian/docs =================================================================== Added: trunk/interfaces/src/deb-package/debian/files =================================================================== --- trunk/interfaces/src/deb-package/debian/files (rev 0) +++ trunk/interfaces/src/deb-package/debian/files 2012-09-12 22:56:17 UTC (rev 3843) @@ -0,0 +1,2 @@ +dl-learner-interfaces_0.5-2.1_all.deb misc extra +dl-learner-interfaces_0.5-2.tar.gz Added: trunk/interfaces/src/deb-package/debian/postinst =================================================================== --- trunk/interfaces/src/deb-package/debian/postinst (rev 0) +++ trunk/interfaces/src/deb-package/debian/postinst 2012-09-12 22:56:17 UTC (rev 3843) @@ -0,0 +1,5 @@ +#!/bin/sh + +chmod 666 /usr/share/dllearner + +exit 0 Added: trunk/interfaces/src/deb-package/debian/rules =================================================================== --- trunk/interfaces/src/deb-package/debian/rules (rev 0) +++ trunk/interfaces/src/deb-package/debian/rules 2012-09-12 22:56:17 UTC (rev 3843) @@ -0,0 +1,13 @@ +#!/usr/bin/make -f +# -*- makefile -*- +# Sample debian/rules that uses debhelper. +# This file was originally written by Joey Hess and Craig Small. +# As a special exception, when this file is copied by dh-make into a +# dh-make output file, you may use that output file without restriction. +# This special exception was added by Craig Small in version 0.37 of dh-make. + +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 + +%: + dh $@ Added: trunk/interfaces/src/deb-package/debian/source/format =================================================================== --- trunk/interfaces/src/deb-package/debian/source/format (rev 0) +++ trunk/interfaces/src/deb-package/debian/source/format 2012-09-12 22:56:17 UTC (rev 3843) @@ -0,0 +1 @@ +3.0 (native) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-09-03 14:28:44
|
Revision: 3842 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3842&view=rev Author: lorenz_b Date: 2012-09-03 14:28:33 +0000 (Mon, 03 Sep 2012) Log Message: ----------- Fixed bug. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentObjectPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/core/owl/EquivalentObjectPropertiesAxiom.java trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java 2012-08-30 15:07:38 UTC (rev 3841) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java 2012-09-03 14:28:33 UTC (rev 3842) @@ -194,24 +194,24 @@ for(ObjectProperty p : completeDisjointProperties){ if(usePropertyPopularity){ int overlap = 0; - int pop; + int otherPopularity; if(ks.isRemote()){ - pop = reasoner.getPopularity(p); + otherPopularity = reasoner.getPopularity(p); } else { Model model = ((LocalModelBasedSparqlEndpointKS)ks).getModel(); - pop = model.listStatements(null, model.getProperty(p.getName()), (RDFNode)null).toSet().size(); + otherPopularity = model.listStatements(null, model.getProperty(p.getName()), (RDFNode)null).toSet().size(); } - //we skip classes with no instances - if(pop == 0) continue; + //we skip properties with no instances + if(otherPopularity == 0) continue; //we compute the estimated precision - double precision = accuracy(pop, overlap); + double precision = accuracy(otherPopularity, overlap); //we compute the estimated recall double recall = accuracy(popularity, overlap); //compute the overall score double score = 1 - fMEasure(precision, recall); - evalAxiom = new EvaluatedAxiom(new DisjointObjectPropertyAxiom(propertyToDescribe, p), new AxiomScore(score)); + evalAxiom = new EvaluatedAxiom(new DisjointObjectPropertyAxiom(propertyToDescribe, p), new AxiomScore(score, score, popularity, popularity, 0)); } else { evalAxiom = new EvaluatedAxiom(new DisjointObjectPropertyAxiom(propertyToDescribe, p), new AxiomScore(1)); } @@ -223,24 +223,24 @@ for(Entry<ObjectProperty, Integer> entry : sortByValues(property2Count)){ p = entry.getKey(); int overlap = entry.getValue(); - int pop; + int otherPopularity; if(ks.isRemote()){ - pop = reasoner.getPopularity(p); + otherPopularity = reasoner.getPopularity(p); } else { Model model = ((LocalModelBasedSparqlEndpointKS)ks).getModel(); - pop = model.listStatements(null, model.getProperty(p.getName()), (RDFNode)null).toSet().size(); + otherPopularity = model.listStatements(null, model.getProperty(p.getName()), (RDFNode)null).toSet().size(); } - //we skip classes with no instances - if(pop == 0) continue; + //we skip properties with no instances + if(otherPopularity == 0) continue; //we compute the estimated precision - double precision = accuracy(pop, overlap); + double precision = accuracy(otherPopularity, overlap); //we compute the estimated recall double recall = accuracy(popularity, overlap); //compute the overall score double score = 1 - fMEasure(precision, recall); - evalAxiom = new EvaluatedAxiom(new DisjointObjectPropertyAxiom(propertyToDescribe, p), new AxiomScore(score)); + evalAxiom = new EvaluatedAxiom(new DisjointObjectPropertyAxiom(propertyToDescribe, p), new AxiomScore(score, score, popularity, popularity - overlap, overlap)); } property2Count.put(propertyToDescribe, all); @@ -251,7 +251,7 @@ SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); // endpoint = new SparqlEndpoint(new URL("http://dbpedia.aksw.org:8902/sparql"), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList())); DisjointObjectPropertyAxiomLearner l = new DisjointObjectPropertyAxiomLearner(new SparqlEndpointKS(endpoint));//.getEndpointDBpediaLiveAKSW())); - l.setPropertyToDescribe(new ObjectProperty("http://dbpedia.org/ontology/aircraftTransport")); + l.setPropertyToDescribe(new ObjectProperty("http://dbpedia.org/ontology/league")); l.setMaxExecutionTimeInSeconds(10); l.init(); l.getReasoner().precomputeObjectPropertyPopularity(); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentObjectPropertyAxiomLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentObjectPropertyAxiomLearner.java 2012-08-30 15:07:38 UTC (rev 3841) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/properties/EquivalentObjectPropertyAxiomLearner.java 2012-09-03 14:28:33 UTC (rev 3842) @@ -171,9 +171,9 @@ property2Count.remove(propertyToDescribe); EvaluatedAxiom evalAxiom; - Set<ObjectProperty> properties; + List<ObjectProperty> properties; for(Entry<ObjectProperty, Integer> entry : sortByValues(property2Count)){ - properties = new HashSet<ObjectProperty>(); + properties = new ArrayList<ObjectProperty>(); properties.add(propertyToDescribe); properties.add(entry.getKey()); int popularity = reasoner.getPropertyCount(entry.getKey()); Modified: trunk/components-core/src/main/java/org/dllearner/core/owl/EquivalentObjectPropertiesAxiom.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/owl/EquivalentObjectPropertiesAxiom.java 2012-08-30 15:07:38 UTC (rev 3841) +++ trunk/components-core/src/main/java/org/dllearner/core/owl/EquivalentObjectPropertiesAxiom.java 2012-09-03 14:28:33 UTC (rev 3842) @@ -19,21 +19,21 @@ package org.dllearner.core.owl; +import java.util.Collection; import java.util.Iterator; import java.util.Map; -import java.util.Set; public class EquivalentObjectPropertiesAxiom extends PropertyAxiom { private static final long serialVersionUID = -1085651734702155330L; - private Set<ObjectProperty> equivalentProperties; + private Collection<ObjectProperty> equivalentProperties; - public EquivalentObjectPropertiesAxiom(Set<ObjectProperty> equivalentProperties) { + public EquivalentObjectPropertiesAxiom(Collection<ObjectProperty> equivalentProperties) { this.equivalentProperties = equivalentProperties; } - public Set<ObjectProperty> getEquivalentProperties() { + public Collection<ObjectProperty> getEquivalentProperties() { return equivalentProperties; } Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2012-08-30 15:07:38 UTC (rev 3841) +++ trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2012-09-03 14:28:33 UTC (rev 3842) @@ -109,6 +109,7 @@ cache = new ExtractionDBCache("cache"); } classPopularityMap = new HashMap<NamedClass, Integer>(); + objectPropertyPopularityMap = new HashMap<ObjectProperty, Integer>(); } public SPARQLReasoner(SparqlEndpointKS ks, ExtractionDBCache cache) { @@ -116,12 +117,14 @@ this.cache = cache; classPopularityMap = new HashMap<NamedClass, Integer>(); + objectPropertyPopularityMap = new HashMap<ObjectProperty, Integer>(); } public SPARQLReasoner(OntModel model) { this.model = model; classPopularityMap = new HashMap<NamedClass, Integer>(); + objectPropertyPopularityMap = new HashMap<ObjectProperty, Integer>(); } public void precomputePopularity(){ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-08-30 15:07:47
|
Revision: 3841 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3841&view=rev Author: christinaunger Date: 2012-08-30 15:07:38 +0000 (Thu, 30 Aug 2012) Log Message: ----------- (nur der Vollst?\195?\164ndigkeit halber...) Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-08-30 15:05:08 UTC (rev 3840) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-08-30 15:07:38 UTC (rev 3841) @@ -381,8 +381,9 @@ try { Template temp = d2s.convert(drs,slots); - if (temp == null) {continue;} - temp = temp.checkandrefine(); + if (temp == null) { continue; } + temp = temp.checkandrefine(); + if (temp == null) { continue; } if (USE_WORDNET) { // find WordNet synonyms This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-08-30 15:05:18
|
Revision: 3840 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3840&view=rev Author: christinaunger Date: 2012-08-30 15:05:08 +0000 (Thu, 30 Aug 2012) Log Message: ----------- throwing out some more templates (based on DRSs that contain empty(...,int) or regextoken(...,int)) Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java 2012-08-30 13:47:49 UTC (rev 3839) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java 2012-08-30 15:05:08 UTC (rev 3840) @@ -464,7 +464,7 @@ } private boolean restructureEmpty(DRS drs) { - + Set<Simple_DRS_Condition> emptyConditions = new HashSet<Simple_DRS_Condition>(); for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) { if(c.getPredicate().equals("empty")) { @@ -528,5 +528,5 @@ } } return globalsuccess; - } + } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-08-30 13:47:49 UTC (rev 3839) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-08-30 15:05:08 UTC (rev 3840) @@ -84,10 +84,9 @@ // System.out.println("\n--- DRS (before): " + drs); // DEBUG redundantEqualRenaming(drs); - if (!restructureEmpty(drs)) { - return null; + if (!restructureEmpty(drs) || !replaceRegextoken(drs)) { + return null; } - replaceRegextoken(drs); // System.out.println("--- DRS (after) : " + drs); // DEBUG for (DiscourseReferent referent : drs.collectDRs()) { @@ -467,11 +466,16 @@ } } - private void replaceRegextoken(DRS drs) { - + private boolean replaceRegextoken(DRS drs) { + Set<Simple_DRS_Condition> cs = new HashSet<Simple_DRS_Condition>(); for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) { - if(c.getPredicate().equals("regextoken")) cs.add(c); + if(c.getPredicate().equals("regextoken")) { + for (DiscourseReferent arg : c.getArguments()) { + if (arg.getValue().matches("[1-9]+")) return false; + else cs.add(c); + } + } } String var; @@ -554,15 +558,19 @@ break; } } + return true; } private boolean restructureEmpty(DRS drs) { - + Set<Simple_DRS_Condition> emptyConditions = new HashSet<Simple_DRS_Condition>(); for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) { if(c.getPredicate().equals("empty") || c.getPredicate().equals("empty_data")) { - emptyConditions.add(c); + for (DiscourseReferent arg : c.getArguments()) { + if (arg.getValue().matches("[1-9]+")) drs.removeCondition(c); + else emptyConditions.add(c); + } } } if (emptyConditions.isEmpty()) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-08-30 13:47:49 UTC (rev 3839) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-08-30 15:05:08 UTC (rev 3840) @@ -230,10 +230,9 @@ try { Template temp = d2s.convert(drs,slots); - temp = temp.checkandrefine(); - if (temp == null) { - continue; - } + if (temp == null) { continue; } + temp = temp.checkandrefine(); + if (temp == null) { continue; } if (USE_WORDNET) { // find WordNet synonyms List<String> newwords; @@ -382,9 +381,8 @@ try { Template temp = d2s.convert(drs,slots); - temp = temp.checkandrefine(); if (temp == null) {continue;} - + temp = temp.checkandrefine(); if (USE_WORDNET) { // find WordNet synonyms Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-08-30 13:47:49 UTC (rev 3839) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-08-30 15:05:08 UTC (rev 3840) @@ -108,7 +108,7 @@ had || (S DP[subject] (VP V:'had' DP[object])) || <x, l1, t, [ l1:[ | ], l2:[ | empty(x,y) ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> // with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>,[ l1:[| empty(x,y) ] ],[(l2,y,dp,<<e,t>,t>)],[l2=l1],[]> - + people || (NP N:'people') || <x,l1,<e,t>,[ l1:[|] ],[],[],[]> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-08-30 13:48:00
|
Revision: 3839 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3839&view=rev Author: lorenz_b Date: 2012-08-30 13:47:49 +0000 (Thu, 30 Aug 2012) Log Message: ----------- Fixed bug. Storing metrics to explain accuracy score. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java trunk/components-core/src/main/java/org/dllearner/learningproblems/AxiomScore.java trunk/components-core/src/main/java/org/dllearner/utilities/owl/DLLearnerDescriptionConvertVisitor.java Modified: trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java 2012-08-27 12:13:42 UTC (rev 3838) +++ trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java 2012-08-30 13:47:49 UTC (rev 3839) @@ -414,7 +414,7 @@ double confidence = confidenceInterval[1] - confidenceInterval[0]; - return new AxiomScore(accuracy, confidence); + return new AxiomScore(accuracy, confidence, total, success, total-success); } protected double accuracy(int total, int success){ Modified: trunk/components-core/src/main/java/org/dllearner/learningproblems/AxiomScore.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/learningproblems/AxiomScore.java 2012-08-27 12:13:42 UTC (rev 3838) +++ trunk/components-core/src/main/java/org/dllearner/learningproblems/AxiomScore.java 2012-08-30 13:47:49 UTC (rev 3839) @@ -27,13 +27,26 @@ private double accuracy; private double confidence; + private int totalNrOfExamples; + private int nrOfpositiveExamples; + private int nrOfnegativeExamples; + public AxiomScore(double accuracy) { this.accuracy = accuracy; } public AxiomScore(double accuracy, double confidence) { this.accuracy = accuracy; + this.confidence = confidence; } + + public AxiomScore(double accuracy, double confidence, int totalNrOfExamples, int nrOfpositiveExamples, int nrOfnegativeExamples) { + this.accuracy = accuracy; + this.confidence = confidence; + this.totalNrOfExamples = totalNrOfExamples; + this.nrOfpositiveExamples = nrOfpositiveExamples; + this.nrOfnegativeExamples = nrOfnegativeExamples; + } @Override public double getAccuracy() { @@ -43,5 +56,17 @@ public double getConfidence(){ return confidence; } + + public int getTotalNrOfExamples() { + return totalNrOfExamples; + } + + public int getNrOfpositiveExamples() { + return nrOfpositiveExamples; + } + + public int getNrOfnegativeExamples() { + return nrOfnegativeExamples; + } } Modified: trunk/components-core/src/main/java/org/dllearner/utilities/owl/DLLearnerDescriptionConvertVisitor.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/owl/DLLearnerDescriptionConvertVisitor.java 2012-08-27 12:13:42 UTC (rev 3838) +++ trunk/components-core/src/main/java/org/dllearner/utilities/owl/DLLearnerDescriptionConvertVisitor.java 2012-08-30 13:47:49 UTC (rev 3839) @@ -35,6 +35,7 @@ import org.dllearner.core.owl.NamedClass; import org.dllearner.core.owl.Negation; import org.dllearner.core.owl.Nothing; +import org.dllearner.core.owl.ObjectAllRestriction; import org.dllearner.core.owl.ObjectExactCardinalityRestriction; import org.dllearner.core.owl.ObjectMaxCardinalityRestriction; import org.dllearner.core.owl.ObjectMinCardinalityRestriction; @@ -134,7 +135,7 @@ getIRI().toString()); description.getFiller().accept(this); Description d = stack.pop(); - stack.push(new ObjectSomeRestriction(role, d)); + stack.push(new ObjectAllRestriction(role, d)); } @Override This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-08-27 12:13:51
|
Revision: 3838 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3838&view=rev Author: christinaunger Date: 2012-08-27 12:13:42 +0000 (Mon, 27 Aug 2012) Log Message: ----------- extension of the final filtering process that throws out those templates that don't make sense Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java 2012-08-26 12:24:54 UTC (rev 3837) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java 2012-08-27 12:13:42 UTC (rev 3838) @@ -504,6 +504,9 @@ else if (type.toString().equals("PROPERTY")) { slottype = SlotType.PROPERTY; } else if (type.toString().equals("SYMPROPERTY")) { slottype = SlotType.SYMPROPERTY; } else if (type.toString().equals("LITERAL")) { slottype = SlotType.LITERAL; } + else if (type.toString().equals("STRING")) { slottype = SlotType.STRING; } + else if (type.toString().equals("INTEGER")) { slottype = SlotType.INTEGER; } + else if (type.toString().equals("BOOLEAN")) { slottype = SlotType.BOOLEAN; } else { slottype = SlotType.UNSPEC; } {if (true) return new Slot(ref.toString(),slottype,words);} @@ -884,13 +887,18 @@ return false; } + private boolean jj_3_42() { + if (jj_scan_token(B)) return true; + return false; + } + private boolean jj_3_3() { if (jj_3R_3()) return true; return false; } - private boolean jj_3_42() { - if (jj_scan_token(B)) return true; + private boolean jj_3_40() { + if (jj_scan_token(C)) return true; return false; } @@ -910,11 +918,21 @@ return false; } - private boolean jj_3_40() { - if (jj_scan_token(C)) return true; + private boolean jj_3_41() { + if (jj_scan_token(A)) return true; return false; } + private boolean jj_3R_7() { + Token xsp; + xsp = jj_scanpos; + if (jj_3_41()) { + jj_scanpos = xsp; + if (jj_3_42()) return true; + } + return false; + } + private boolean jj_3R_16() { if (jj_3R_7()) return true; if (jj_scan_token(14)) return true; @@ -927,21 +945,6 @@ return false; } - private boolean jj_3_41() { - if (jj_scan_token(A)) return true; - return false; - } - - private boolean jj_3R_7() { - Token xsp; - xsp = jj_scanpos; - if (jj_3_41()) { - jj_scanpos = xsp; - if (jj_3_42()) return true; - } - return false; - } - private boolean jj_3_39() { if (jj_scan_token(A)) return true; return false; @@ -1131,18 +1134,18 @@ return false; } + private boolean jj_3_38() { + if (jj_scan_token(15)) return true; + if (jj_3R_13()) return true; + return false; + } + private boolean jj_3_7() { if (jj_3R_5()) return true; if (jj_scan_token(6)) return true; return false; } - private boolean jj_3_38() { - if (jj_scan_token(15)) return true; - if (jj_3R_13()) return true; - return false; - } - private boolean jj_3_37() { if (jj_3R_13()) return true; return false; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-08-26 12:24:54 UTC (rev 3837) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-08-27 12:13:42 UTC (rev 3838) @@ -458,6 +458,9 @@ else if (type.toString().equals("PROPERTY")) { slottype = SlotType.PROPERTY; } else if (type.toString().equals("SYMPROPERTY")) { slottype = SlotType.SYMPROPERTY; } else if (type.toString().equals("LITERAL")) { slottype = SlotType.LITERAL; } + else if (type.toString().equals("STRING")) { slottype = SlotType.STRING; } + else if (type.toString().equals("INTEGER")) { slottype = SlotType.INTEGER; } + else if (type.toString().equals("BOOLEAN")) { slottype = SlotType.BOOLEAN; } else { slottype = SlotType.UNSPEC; } return new Slot(ref.toString(),slottype,words); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java 2012-08-26 12:24:54 UTC (rev 3837) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java 2012-08-27 12:13:42 UTC (rev 3838) @@ -1,6 +1,6 @@ package org.dllearner.algorithm.tbsl.sparql; public enum SlotType { - RESOURCE, CLASS, OBJECTPROPERTY, DATATYPEPROPERTY, PROPERTY, LITERAL, UNSPEC, + RESOURCE, CLASS, OBJECTPROPERTY, DATATYPEPROPERTY, PROPERTY, LITERAL, STRING, INTEGER, BOOLEAN, UNSPEC, SYMPROPERTY // TODO don't use them anymore } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-08-26 12:24:54 UTC (rev 3837) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-08-27 12:13:42 UTC (rev 3838) @@ -40,12 +40,12 @@ // check for clash (v=LITERAL && v=RESOURCE) for (Slot s : slots) { if ((s.words.get(0).equals(slot.words.get(0)) || s.anchor.equals(slot.words.get(0))) - && !s.type.equals(slot.type)) + && ((slot.type.equals(SlotType.RESOURCE) && isLiteral(s.type)) || (s.type.equals(SlotType.RESOURCE) && isLiteral(slot.type)))) // !s.type.equals(slot.type)) return null; } // check for clash (v=LITERAL && p(...,v)=OBJECTPROPERTY) || (v=RESOURCE && p(...,v)=DATATYPEPROPERTY) SlotType clashing = null; - if (slot.type.equals(SlotType.LITERAL)) clashing = SlotType.OBJECTPROPERTY; + if (isLiteral(slot.type)) clashing = SlotType.OBJECTPROPERTY; else if (slot.type.equals(SlotType.RESOURCE)) clashing = SlotType.DATATYPEPROPERTY; for (Slot s : slots) { if (clashing != null && s.type.equals(clashing)) { @@ -57,6 +57,27 @@ } } } + // check for clashes with FILTERS + for (SPARQL_Filter filter : query.filter) { + for (SPARQL_Pair ts : filter.getTerms()) { + if (ts.a.getName().equals(var) && (isIntegerType(ts.type) || ts.type.equals(SPARQL_PairType.REGEX))) { + // clash 1: counting a literal + for (SPARQL_Term sel : query.selTerms) { + if (sel.name.equals(var) && sel.aggregate.equals(SPARQL_Aggregate.COUNT)) + return null; + } + // clash 2: FILTER regex(?var,...) and FILTER (?var > ...) + for (SPARQL_Filter f : query.filter) { + if (!f.equals(filter)) { + for (SPARQL_Pair p : f.getTerms()) { + if (p.a.name.equals(var) && (p.type.equals(SPARQL_PairType.REGEX) && isIntegerType(ts.type)) || (ts.type.equals(SPARQL_PairType.REGEX) && isIntegerType(p.type))) + return null; + } + } + } + } + } + } } for (Slot slot : slots) { @@ -65,7 +86,7 @@ for (SPARQL_Triple triple : query.conditions) { if (triple.property.toString().equals("rdf:type") && triple.value.toString().equals("?"+slot.anchor)) { for (Slot s : argslots) { - if (s.words.contains(triple.variable.toString().replace("?","")) && s.type.equals(SlotType.LITERAL)) + if (s.words.contains(triple.variable.toString().replace("?","")) && isLiteral(s.type)) return null; } } @@ -81,7 +102,7 @@ for (String arg : args) { for (Slot s : argslots) { if (s.words.contains(arg.replace("?",""))) { - if (s.type.equals(SlotType.LITERAL)) slot.type = SlotType.DATATYPEPROPERTY; + if (isLiteral(s.type)) slot.type = SlotType.DATATYPEPROPERTY; else if (s.type.equals(SlotType.RESOURCE)) slot.type = SlotType.OBJECTPROPERTY; } } @@ -111,8 +132,29 @@ } slots = keep; + // additionally, filter out those templates that count a var that does not occur in the triples + // (these templates should not be built in the first place, but they are...) + for (SPARQL_Term t : query.selTerms) { + if (t.aggregate.equals(SPARQL_Aggregate.COUNT)) { + String v = t.name; + boolean fine = false; + for (SPARQL_Triple triple : query.conditions) { + if ((triple.variable.name.equals(v) || triple.value.name.equals(v))) { + fine = true; break; + } + } + if (!fine) return null; + } + } + return this; } + private boolean isLiteral(SlotType st) { + return st.equals(SlotType.STRING) || st.equals(SlotType.INTEGER) || st.equals(SlotType.LITERAL); + } + private boolean isIntegerType(SPARQL_PairType p) { + return p.equals(SPARQL_PairType.GT) || p.equals(SPARQL_PairType.LT) || p.equals(SPARQL_PairType.GTEQ) || p.equals(SPARQL_PairType.LTEQ); + } public String toString() { Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-08-26 12:24:54 UTC (rev 3837) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-08-27 12:13:42 UTC (rev 3838) @@ -31,7 +31,7 @@ below .+ pounds || (NP NP* (PP P:'below' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> below .+ pounds || (PP P:'below' (NP NUM[num] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> from .+ to .+ pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - between .+ and .+ pounds || (NP NP* (PP P:'between' NUM[num1] P:'and' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/prices ]> + between .+ and .+ pounds || (NP NP* (PP P:'between' NUM[num1] P:'and' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ z | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> with || (PP P:'with' DP[dp]) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ z | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-08-26 12:25:06
|
Revision: 3837 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3837&view=rev Author: christinaunger Date: 2012-08-26 12:24:54 +0000 (Sun, 26 Aug 2012) Log Message: ----------- very minor things... Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TemplatorHandler.java trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java Property Changed: ---------------- trunk/components-ext/src/main/resources/ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2012-08-25 17:06:30 UTC (rev 3836) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2012-08-26 12:24:54 UTC (rev 3837) @@ -163,7 +163,7 @@ if (token.matches(anchor)) { foundCandidates = true; - coveredTokens.add(token); + coveredTokens.add(anchor.replace(".+","")); // DISAM String[] newTokenParts = new String[tokenParts.length]; @@ -243,7 +243,7 @@ buildSlotFor.add(new Pair<String,String>(word,s.trim().substring(s.indexOf("/")+1))); doubles.add(word); } else { - System.out.println("Oh no, " + s + " has no POS tag!"); + logger.error("Oh no, " + s + " has no POS tag!"); } } if (VERBOSE) logger.trace("build slot for: " + buildSlotFor + "\n"); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TemplatorHandler.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TemplatorHandler.java 2012-08-25 17:06:30 UTC (rev 3836) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TemplatorHandler.java 2012-08-26 12:24:54 UTC (rev 3837) @@ -17,6 +17,10 @@ basictemplator.setGrammarFiles(files); } + public void setVerbose(boolean b) { + templator.setVERBOSE(b); + } + public Set<Template> buildTemplates(String s) { return templator.buildTemplates(s); } Property changes on: trunk/components-ext/src/main/resources ___________________________________________________________________ Added: svn:ignore + .log4j.properties.swp Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-08-25 17:06:30 UTC (rev 3836) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-08-26 12:24:54 UTC (rev 3837) @@ -15,27 +15,32 @@ in walking distance from || (PP P:'in' (NP N:'walking' N:'distance' P:'from' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/at_walking_distance ]> at walking distance from || (NP NP* (PP P:'at' (NP N:'walking' N:'distance' P:'from' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/at_walking_distance ]> at walking distance from || (PP P:'at' (NP N:'walking' N:'distance' P:'from' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/at_walking_distance ]> - in the area || (NP NP* (PP P:'in' (DP DET:'the' (NP N:'area' DP[dp])))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near]> + in the area || (NP NP* (PP P:'in' (DP DET:'the' (NP N:'area' DP[dp])))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near]> + in the area of || (NP NP* (PP P:'in' (DP DET:'the' (NP N:'area' (PP P:'of' DP[dp]))))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near]> in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address^street ]> on || (NP NP* (PP P:'on' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address^street ]> since || (NP NP* (PP P:'since' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_since(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_since/PROPERTY/since ]> - for .+ pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - for more than .+ pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - for less than .+ pounds || (NP NP* (PP P:'for' DET:'less' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - for less than || (NP NP* (PP P:'for' DET:'less' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[v | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - for more than || (NP NP* (PP P:'for' DET:'more' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[v | SLOT_includes(v,x), SLOT_price(v,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - cheaper than .+ pounds || (NP NP* (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - cheaper than .+ pounds || (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - below .+ pounds || (NP NP* (PP P:'below' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - below .+ pounds || (PP P:'below' (NP NUM[num] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - from .+ to .+ pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - between .+ and .+ pounds || (NP NP* (PP P:'between' NUM[num1] P:'and' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> + for .+ pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for more than .+ pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for less than .+ pounds || (NP NP* (PP P:'for' DET:'less' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for less than || (NP NP* (PP P:'for' DET:'less' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for more than || (NP NP* (PP P:'for' DET:'more' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + cheaper than .+ pounds || (NP NP* (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1: [ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + cheaper than .+ pounds || (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + below .+ pounds || (NP NP* (PP P:'below' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + below .+ pounds || (PP P:'below' (NP NUM[num] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + from .+ to .+ pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + between .+ and .+ pounds || (NP NP* (PP P:'between' NUM[num1] P:'and' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/prices ]> - with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> - with || (PP P:'with' DP[dp]) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> + with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ z | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> + with || (PP P:'with' DP[dp]) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ z | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> square meters || (NP N:'square' N:'meters') || <x,l1,<e,t>, [l1:[ | SLOT_size(x,y) ]], [],[],[SLOT_size/DATATYPEPROPERTY/size ]> +// ADJECTIVES + + brand new || (NP ADJ:'brand' ADJ:'new' NP*) || <x,l1,<e,t>, [ l1:[ | ] ], [], [],[]> + // MONTHS january || (DP DP:'january') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,1) ]], [],[],[]> Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java 2012-08-25 17:06:30 UTC (rev 3836) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java 2012-08-26 12:24:54 UTC (rev 3837) @@ -17,6 +17,7 @@ public static void main(String[] args) { TemplatorHandler handler = new TemplatorHandler(GRAMMAR_FILES); + handler.setVerbose(true); System.out.println("======= SPARQL Templator v0.1 ============="); System.out.print("\nMode: "); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-08-25 17:06:36
|
Revision: 3836 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3836&view=rev Author: lorenz_b Date: 2012-08-25 17:06:30 +0000 (Sat, 25 Aug 2012) Log Message: ----------- Updated URL params. Modified Paths: -------------- trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java Modified: trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java 2012-08-23 14:35:33 UTC (rev 3835) +++ trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java 2012-08-25 17:06:30 UTC (rev 3836) @@ -203,8 +203,8 @@ final boolean useInference = req.getParameter("use_inference") == null ? false : Boolean.valueOf(req .getParameter("use_inference")); - final int maxNrOfReturnedAxioms = req.getParameter("maxNrOfReturnedAxioms") == null ? DEFAULT_MAX_NR_OF_RETURNED_AXIOMS : Integer.parseInt(req.getParameter("maxNrOfReturnedAxioms")); - final int maxExecutionTimeInSeconds = req.getParameter("maxExecutionTimeInSeconds") == null ? DEFAULT_MAX_EXECUTION_TIME_IN_SECONDS : Integer.parseInt(req.getParameter("maxExecutionTimeInSeconds")); + final int maxNrOfReturnedAxioms = req.getParameter("max_returned_axioms") == null ? DEFAULT_MAX_NR_OF_RETURNED_AXIOMS : Integer.parseInt(req.getParameter("max_returned_axioms")); + final int maxExecutionTimeInSeconds = req.getParameter("max_execution_time") == null ? DEFAULT_MAX_EXECUTION_TIME_IN_SECONDS : Integer.parseInt(req.getParameter("max_execution_time")); final double threshold = req.getParameter("threshold") == null ? DEFAULT_THRESHOLD : Double.parseDouble(req.getParameter("threshold")); String resourceURI = req.getParameter("resource_uri"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-08-23 14:35:44
|
Revision: 3835 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3835&view=rev Author: kirdie Date: 2012-08-23 14:35:33 +0000 (Thu, 23 Aug 2012) Log Message: ----------- more work on the oxford test. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-23 13:19:03 UTC (rev 3834) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-23 14:35:33 UTC (rev 3835) @@ -911,6 +911,7 @@ // return 0; // } // return Math.log(cnt); + if(Double.isNaN(popularity)) {throw new AssertionError("prominence NaN for uri "+uri+", slot type "+type);} return popularity; } Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-08-23 13:19:03 UTC (rev 3834) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-08-23 14:35:33 UTC (rev 3835) @@ -33,6 +33,7 @@ import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; +import com.hp.hpl.jena.rdf.model.Model; class QueryTestData implements Serializable { @@ -68,12 +69,12 @@ catch (ClassNotFoundException e){throw new RuntimeException(e);} } - public QueryTestData generateAnswers(SparqlEndpoint endpoint, ExtractionDBCache cache) + public QueryTestData generateAnswers(SparqlEndpoint endpoint, ExtractionDBCache cache,Model model) { if(!id2Answers.isEmpty()) {throw new AssertionError("Answers already existing.");} for(int i:id2Query.keySet()) { - Set<String> uris = SPARQLTemplateBasedLearner3Test.getUris(endpoint, id2Query.get(i),cache); + Set<String> uris = SPARQLTemplateBasedLearner3Test.getUris(endpoint, id2Query.get(i),cache,model); id2Answers.put(i, uris); // empty answer set better transfers intended meaning and doesn't cause NPEs in html generation :-) if(!uris.isEmpty()) {/*id2Answers.put(i, uris);*/} else {id2LearnStatus.put(i, LearnStatus.QUERY_RESULT_EMPTY);} Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-23 13:19:03 UTC (rev 3834) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-23 14:35:33 UTC (rev 3835) @@ -50,6 +50,7 @@ import org.apache.log4j.SimpleLayout; import org.dllearner.algorithm.tbsl.ltag.parser.Parser; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.SynchronizedStanfordPartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.templator.Templator; @@ -70,8 +71,11 @@ import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import cern.colt.Arrays; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.RDFNode; @@ -104,17 +108,25 @@ {test("QALD 2 Benchmark ideally tagged", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache);} + /*@Test*/ public void testOxford() throws Exception + { + } + @Test public void generateXMLOxford() throws IOException { - Model m = loadOxfordModel(); + boolean ADD_POS_TAGS = false; + PartOfSpeechTagger posTagger = new StanfordPartOfSpeechTagger(); + Model model = loadOxfordModel(); List<String> questions = new LinkedList<String>(); BufferedReader in = new BufferedReader((new InputStreamReader(getClass().getClassLoader().getResourceAsStream("tbsl/oxford_eval_queries.txt")))); int j=0; for(String line;(line=in.readLine())!=null;) { j++; - if(j>1) break; - if(!line.isEmpty()) {questions.add(line.replace("question: ", ""));} + // if(j>5) break; // TODO: remove later + String question = line.replace("question: ", "").trim(); + if(ADD_POS_TAGS&&!OXFORD_PRETAGGED) {question = posTagger.tag(question);} + if(!line.trim().isEmpty()) {questions.add(question);} } in.close(); SortedMap<Integer,String> id2Question = new TreeMap<Integer, String>(); @@ -126,7 +138,8 @@ SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - QueryTestData testData = generateTestDataMultiThreaded(id2Question, null,m,mappingIndex,OXFORD_PRETAGGED); + QueryTestData testData = generateTestDataMultiThreaded(id2Question, null,model,mappingIndex,ADD_POS_TAGS||OXFORD_PRETAGGED); + testData.generateAnswers(null, null, model); testData.writeQaldXml(new File("log/test.xml")); } @@ -175,7 +188,7 @@ String type = (ending.equals("ttl")||ending.equals("nt"))?"TURTLE":ending.equals("owl")?"RDF/XML":String.valueOf(Integer.valueOf("filetype "+ending+" not handled.")); // switch(type) {case "ttl":type="TURTLE";break;case "owl":type="RDF/XML";break;default:throw new RuntimeException("filetype "+ending+" not handled.");} // no Java 1.7 :-( try{ -// m.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/data/"+s)), null, type);}catch (FileNotFoundException e) {} + // m.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/data/"+s)), null, type);}catch (FileNotFoundException e) {} m.read(getClass().getClassLoader().getResourceAsStream("oxford/"+s),null, type);} catch(RuntimeException e) {throw new RuntimeException("Could not read into model: "+s,e);} } @@ -241,7 +254,7 @@ if(!updatedReferenceXML.exists()) { logger.info("Generating updated reference for "+title); - generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); + generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache,null); } QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML); @@ -251,7 +264,7 @@ QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, dbpediaLiveKnowledgebase,null,null,DBPEDIA_PRETAGGED); long endLearning = System.currentTimeMillis(); logger.info("finished learning after "+(endLearning-startLearning)/1000.0+"s"); - learnedTestData.generateAnswers(endpoint,cache); + learnedTestData.generateAnswers(endpoint,cache,null); long endGeneratingAnswers = System.currentTimeMillis(); logger.info("finished generating answers in "+(endGeneratingAnswers-endLearning)/1000.0+"s"); Evaluation evaluation = evaluate(referenceTestData, learnedTestData); @@ -259,6 +272,11 @@ evaluation.write(); } + private void evaluateAndWrite() + { + + } + /** evaluates a data set against a reference. * @param reference the test data assumed to be correct. needs to contain the answers for all queries. * @param suspect the test data to compare with the reference. @@ -542,7 +560,8 @@ } catch (ExecutionException e) { - throw new RuntimeException("question="+question,e); + testData.id2LearnStatus.put(i, new LearnStatus(LearnStatus.Type.EXCEPTION, e)); + //throw new RuntimeException("question="+question,e); } catch (TimeoutException e) { @@ -590,7 +609,7 @@ * @throws SAXException * @throws TransformerException */ - private void generateUpdatedXML(File originalFile, File updatedFile,SparqlEndpoint endpoint, ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException + private void generateUpdatedXML(File originalFile, File updatedFile,SparqlEndpoint endpoint, ExtractionDBCache cache,Model model) throws ParserConfigurationException, SAXException, IOException, TransformerException { logger.info(String.format("Updating question file \"%s\" by removing questions without nonempty resource list answer and adding answers.\n" + " Saving the result to file \"%s\"",originalFile.getPath(),updatedFile.getPath())); @@ -629,7 +648,7 @@ if(!query.equals("OUT OF SCOPE")) // marker in qald benchmark file, will create holes interval of ids (e.g. 1,2,5,7) { - Set<String> uris = getUris(endpoint, query,cache); + Set<String> uris = getUris(endpoint, query,cache,model); if(!uris.isEmpty()) { // remove reference answers of the benchmark because they are obtained from an other endpoint @@ -687,7 +706,7 @@ // private SPARQLTemplateBasedLearner2 oxfordLearner; // private SPARQLTemplateBasedLearner2 dbpediaLiveLearner; -// private final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); + // private final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); private final static ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); private final Knowledgebase dbpediaLiveKnowledgebase = createDBpediaLiveKnowledgebase(dbpediaLiveCache); @@ -725,7 +744,7 @@ Logger.getRootLogger().setLevel(Level.WARN); Logger.getLogger(Templator.class).setLevel(Level.WARN); Logger.getLogger(Parser.class).setLevel(Level.WARN); - Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); + Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.WARN); // Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); logger.setLevel(Level.INFO); // TODO: remove when finishing implementation of this class logger.addAppender(new FileAppender(new SimpleLayout(), "log/"+this.getClass().getSimpleName()+".log", false)); @@ -734,16 +753,21 @@ // oxfordLearner = new SPARQLTemplateBasedLearner2(createOxfordKnowledgebase(oxfordCache)); } - public static Set<String> getUris(final SparqlEndpoint endpoint, final String query, ExtractionDBCache cache) + public static Set<String> getUris(final SparqlEndpoint endpoint, final String query, ExtractionDBCache cache, Model model) { if(query==null) {throw new AssertionError("query is null");} - if(endpoint==null) {throw new AssertionError("endpoint is null");} +// if(endpoint==null) {throw new AssertionError("endpoint is null");} if(!query.contains("SELECT")&&!query.contains("select")) {return Collections.<String>emptySet();} // abort when not a select query Set<String> uris = new HashSet<String>(); // QueryEngineHTTP qe = new QueryEngineHTTP(DBPEDIA_LIVE_ENDPOINT_URL_STRING, query); + ResultSet rs; // try{rs = qe.execSelect();} - try{rs = executeSelect(endpoint, query, cache);} + try + { + if(model!=null) {rs = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model).execSelect();} + else {rs = executeSelect(endpoint, query, cache);} + } catch(QueryExceptionHTTP e) { logger.error("Error getting uris for query "+query+" at endpoint "+endpoint,e); @@ -780,7 +804,7 @@ } return uris; } - + private static String urlDecode(String url){ String decodedURL = null; try { @@ -836,7 +860,7 @@ static private class POSTaggerHolder {static public final PartOfSpeechTagger posTagger = new SynchronizedStanfordPartOfSpeechTagger();} - + static private final WordNet wordnet = new WordNet(); static private final Options options = new Options(); private final SPARQLTemplateBasedLearner2 learner; @@ -848,6 +872,7 @@ this.testData=testData; learner = new SPARQLTemplateBasedLearner2(knowledgeBase,pretagged?null:POSTaggerHolder.posTagger,wordnet,options); try {learner.init();} catch (ComponentInitException e) {throw new RuntimeException(e);} + learner.setUseIdealTagger(pretagged); } public LearnQueryCallable(String question, int id, QueryTestData testData, Model model,MappingBasedIndex index,boolean pretagged) @@ -861,16 +886,18 @@ SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - + learner = new SPARQLTemplateBasedLearner2(model,mappingIndex,pretagged?null:POSTaggerHolder.posTagger); try {learner.init();} catch (ComponentInitException e) {throw new RuntimeException(e);} + learner.setUseIdealTagger(pretagged); learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}); learner.setUseDomainRangeRestriction(false); } @Override public LearnStatus call() - { + { + logger.trace("learning question: "+question); try { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |