From: <ki...@us...> - 2012-09-19 15:16:08
|
Revision: 3846 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3846&view=rev Author: kirdie Date: 2012-09-19 15:15:56 +0000 (Wed, 19 Sep 2012) Log Message: ----------- started implementing using hmm for disambiguation Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-19 15:12:20 UTC (rev 3845) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-19 15:15:56 UTC (rev 3846) @@ -1,27 +1,19 @@ package org.dllearner.algorithm.tbsl.learning; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URL; +import hmm.HiddenMarkovModel; import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Set; +import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; +import org.apache.commons.collections15.MultiMap; import org.apache.log4j.Logger; import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; @@ -31,13 +23,7 @@ import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.sparql.Allocation; import org.dllearner.algorithm.tbsl.sparql.Query; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property; import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Value; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; @@ -46,13 +32,10 @@ import org.dllearner.algorithm.tbsl.util.Knowledgebase; import org.dllearner.algorithm.tbsl.util.PopularityMap; import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType; -import org.dllearner.algorithm.tbsl.util.Similarity; -import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper.SymPropertyDirection; import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; import org.dllearner.common.index.IndexResultSet; import org.dllearner.common.index.MappingBasedIndex; -import org.dllearner.common.index.SOLRIndex; import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; import org.dllearner.common.index.SPARQLIndex; import org.dllearner.common.index.SPARQLObjectPropertiesIndex; @@ -63,19 +46,13 @@ import org.dllearner.core.ComponentInitException; import org.dllearner.core.LearningProblem; import org.dllearner.core.SparqlQueryLearningAlgorithm; -import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.NamedClass; -import org.dllearner.core.owl.ObjectProperty; -import org.dllearner.core.owl.Thing; import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.reasoning.SPARQLReasoner; -import org.ini4j.InvalidFileFormatException; import org.ini4j.Options; - import com.hp.hpl.jena.ontology.OntModelSpec; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; @@ -85,10 +62,6 @@ import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; -import com.hp.hpl.jena.sparql.expr.ExprAggregator; -import com.hp.hpl.jena.sparql.expr.ExprVar; -import com.hp.hpl.jena.sparql.expr.aggregate.AggCount; -import com.hp.hpl.jena.sparql.expr.aggregate.Aggregator; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -516,358 +489,88 @@ return relevantKeywords; } - private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ - logger.debug("Generating SPARQL query candidates..."); - - Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { - - @Override - public int compare(Slot o1, Slot o2) { - if(o1.getSlotType() == o2.getSlotType()){ - return o1.getToken().compareTo(o2.getToken()); - } else { - return -1; + // just for testing the HMM integration, getWeightedSPARQLQueriesOld is the original one + private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates) + { + // for testing + for(Template template: templates) + { + { + ArrayList<String> keywords = new ArrayList<String>(); + for(Slot slot: template.getSlots()) + { + keywords.add(slot.getWords().get(0)); } + if(template.getSlots().size()!=3) {continue;} + if(!keywords.contains("Mean Hamster Software")) {continue;} + if(!keywords.contains("published")) {continue;} + System.out.println("\"keywords\": "+keywords); } - }); - slot2Allocations = Collections.synchronizedMap(new HashMap<Slot, Set<Allocation>>()); - - - SortedSet<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); - - Set<Allocation> allocations; - - for(Template t : templates){ - logger.info("Processing template:\n" + t.toString()); - allocations = new TreeSet<Allocation>(); - boolean containsRegex = t.getQuery().toString().toLowerCase().contains("(regex("); - - ExecutorService executor = Executors.newFixedThreadPool(t.getSlots().size()); - List<Future<Map<Slot, SortedSet<Allocation>>>> list = new ArrayList<Future<Map<Slot, SortedSet<Allocation>>>>(); - - long startTime = System.currentTimeMillis(); - - for (Slot slot : t.getSlots()) { - if(!slot2Allocations.containsKey(slot)){//System.out.println(slot + ": " + slot.hashCode());System.out.println(slot2Allocations); - Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot); - Future<Map<Slot, SortedSet<Allocation>>> submit = executor.submit(worker); - list.add(submit); - } - } - - for (Future<Map<Slot, SortedSet<Allocation>>> future : list) { - try { - Map<Slot, SortedSet<Allocation>> result = future.get(); - Entry<Slot, SortedSet<Allocation>> item = result.entrySet().iterator().next(); - slot2Allocations.put(item.getKey(), item.getValue()); - } catch (InterruptedException e) { - e.printStackTrace(); - } catch (ExecutionException e) { - e.printStackTrace(); + System.out.println(template); + SortedSet<WeightedQuery> queries = new TreeSet<WeightedQuery>(); + Query query = template.getQuery(); + double score = 0; + + Map<List<String>,Collection<String>> segmentToURIs = new HashMap<List<String>,Collection<String>>(); + for(Slot slot: template.getSlots()) + { + List<String> segment = new LinkedList<String>(); + segment.add(slot.getWords().get(0)); // TODO: split it up? + + Set<String> uris = new HashSet<String>(); + + for(IndexResultItem item : getIndexResultItems(slot)) + { + uris.add(item.getUri()); } + segmentToURIs.put(segment,uris); } + HiddenMarkovModel hmm = new HiddenMarkovModel(); + hmm.initialization(); + hmm.startMarkovModel(segmentToURIs,true); + MultiMap<Double,List<String>> paths = hmm.getPaths(); - executor.shutdown(); + // System.out.println(hmm.getPaths()); + // die keywords jetzt in sadeehs algorithmus reinwerfen + // da kommen jetzt pfade raus mit unterschiedlichen wahrscheinlichkeiten + // HiddenMarkovModel HMM = new HiddenMarkovModel(); + // HMM.StartMarkovModel(); + // jetzt die variablen aus der query ersetzen mit den kandidaten + // ranked list der pfade, die die observation sequence generieren - - /*for(Slot slot : t.getSlots()){ - allocations = slot2Allocations2.get(slot); - if(allocations == null){ - allocations = computeAllocations(slot, 10); - slot2Allocations2.put(slot, allocations); - } - slot2Allocations.put(slot, allocations); - - //for tests add the property URI with http://dbpedia.org/property/ namespace - //TODO should be replaced by usage of a separate SOLR index - Set<Allocation> tmp = new HashSet<Allocation>(); - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(Allocation a : allocations){ - String uri = "http://dbpedia.org/property/" + a.getUri().substring(a.getUri().lastIndexOf("/")+1); - Allocation newA = new Allocation(uri, a.getSimilarity(), a.getProminence()); - newA.setScore(a.getScore()-0.000001); - tmp.add(newA); + for(Double d : paths.keySet()) + { + for(List<String> path : paths.get(d)) + { + Query q = new Query(query); + // TODO: which variable stands for which resource? do it randomly now to check if the replacement works and then correct the order later + System.out.println(q.getVariablesAsStringList()); + System.out.println(); + int i = 0; + for(String var : q.getVariablesAsStringList()) + { + q.replaceVarWithURI(var, path.get(i)); + i++; } - } - allocations.addAll(tmp); - }*/ - logger.debug("Time needed: " + (System.currentTimeMillis() - startTime) + "ms"); + System.out.println(q); - Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); - Query cleanQuery = t.getQuery(); - queries.add(new WeightedQuery(cleanQuery)); - Set<WeightedQuery> tmp = new TreeSet<WeightedQuery>(); - List<Slot> sortedSlots = new ArrayList<Slot>(); - Set<Slot> classSlots = new HashSet<Slot>(); - for(Slot slot : t.getSlots()){ - if(slot.getSlotType() == SlotType.CLASS){ - sortedSlots.add(slot); - classSlots.add(slot); + WeightedQuery wQuery = new WeightedQuery(q, score); + queries.add(wQuery); } } - for(Slot slot : t.getSlots()){ - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY || slot.getSlotType() == SlotType.DATATYPEPROPERTY){ - sortedSlots.add(slot); - } - } - for(Slot slot : t.getSlots()){ - if(!sortedSlots.contains(slot)){ - sortedSlots.add(slot); - } - } - //add for each SYMPROPERTY Slot the reversed query - for(Slot slot : sortedSlots){ - for(WeightedQuery wQ : queries){ - if(slot.getSlotType() == SlotType.SYMPROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY){ - Query reversedQuery = new Query(wQ.getQuery()); - reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); - tmp.add(new WeightedQuery(reversedQuery)); - } - tmp.add(wQ); - } - queries.clear(); - queries.addAll(tmp); - tmp.clear(); - } + //System.exit(0); + return queries; + // >> SLOTS: + // y0: RESOURCE {Mean Hamster Software} + // p0: OBJECTPROPERTY {published,print} + // p1: CLASS {video games} - for(Slot slot : sortedSlots){ - if(!slot2Allocations.get(slot).isEmpty()){ - for(Allocation a : slot2Allocations.get(slot)){ - for(WeightedQuery query : queries){ - Query q = new Query(query.getQuery()); - boolean drop = false; - if(useDomainRangeRestriction){ - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); - // System.out.println(triple); - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ - // System.out.println(typeTriple); - if(true){//reasoner.isObjectProperty(a.getUri())){ - Description range = reasoner.getRange(new ObjectProperty(a.getUri())); - // System.out.println(a); - if(range != null){ - Set<Description> allRanges = new HashSet<Description>(); - SortedSet<Description> superClasses; - if(range instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(range); - allRanges.addAll(superClasses); - } else { - for(Description nc : range.getChildren()){ - superClasses = reasoner.getSuperClasses(nc); - allRanges.addAll(superClasses); - } - } - allRanges.add(range); - allRanges.remove(new NamedClass(Thing.instance.getURI())); - - Set<Description> allTypes = new HashSet<Description>(); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Description type = new NamedClass(typeURI); - superClasses = reasoner.getSuperClasses(type); - allTypes.addAll(superClasses); - allTypes.add(type); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; - } - } - } else { - drop = true; - } - - } - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ - Description domain = reasoner.getDomain(new ObjectProperty(a.getUri())); - // System.out.println(a); - if(domain != null){ - Set<Description> allDomains = new HashSet<Description>(); - SortedSet<Description> superClasses; - if(domain instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(domain); - allDomains.addAll(superClasses); - } else { - for(Description nc : domain.getChildren()){ - superClasses = reasoner.getSuperClasses(nc); - allDomains.addAll(superClasses); - } - } - allDomains.add(domain); - allDomains.remove(new NamedClass(Thing.instance.getURI())); - - Set<Description> allTypes = new HashSet<Description>(); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Description type = new NamedClass(typeURI); - superClasses = reasoner.getSuperClasses(type); - allTypes.addAll(superClasses); - allTypes.add(type); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; - } else { - - } - } - } - } - } - } - - if(!drop){ - if(slot.getSlotType() == SlotType.RESOURCE){//avoid queries where predicate is data property and object resource->add REGEX filter in this case - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - SPARQL_Value object = triple.getValue(); - if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position - SPARQL_Property predicate = triple.getProperty(); - if(!predicate.isVariable()){//only consider triple where predicate is URI - String predicateURI = predicate.getName().replace("<", "").replace(">", ""); - if(isDatatypeProperty(predicateURI)){//if data property - q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - } - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - WeightedQuery w = new WeightedQuery(q); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - w.addAllocations(query.getAllocations()); - w.addAllocation(a); - tmp.add(w); - } - - - } - } - //lower queries with FILTER-REGEX - if(containsRegex){ - for(WeightedQuery wQ : tmp){ - wQ.setScore(wQ.getScore() - 0.01); - } - } - - queries.clear(); - queries.addAll(tmp);//System.out.println(tmp); - tmp.clear(); - } else {//Add REGEX FILTER if resource slot is empty and predicate is datatype property - if(slot.getSlotType() == SlotType.RESOURCE){ - for(WeightedQuery query : queries){ - Query q = query.getQuery(); - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - SPARQL_Value object = triple.getValue(); - if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position - SPARQL_Property predicate = triple.getProperty(); - if(!predicate.isVariable()){//only consider triple where predicate is URI - String predicateURI = predicate.getName().replace("<", "").replace(">", ""); - if(isDatatypeProperty(predicateURI)){//if data property - q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); - } - } - } - } - - } - - } else { - if(slot.getSlotType() == SlotType.SYMPROPERTY){ - for(WeightedQuery wQ : queries){ - List<SPARQL_Triple> triples = wQ.getQuery().getTriplesWithVar(slot.getAnchor()); - for(SPARQL_Triple triple : triples){ - String typeVar; - String resourceURI; - SymPropertyDirection direction; - if(triple.getValue().isVariable()){ - direction = SymPropertyDirection.VAR_RIGHT; - typeVar = triple.getValue().getName(); - resourceURI = triple.getVariable().getName(); - } else { - direction = SymPropertyDirection.VAR_LEFT; - typeVar = triple.getVariable().getName(); - resourceURI = triple.getValue().getName(); - } - resourceURI = resourceURI.replace("<", "").replace(">", ""); - List<SPARQL_Triple> typeTriples = wQ.getQuery().getRDFTypeTriples(typeVar); - for(SPARQL_Triple typeTriple : typeTriples){ - String typeURI = typeTriple.getValue().getName().replace("<", "").replace(">", ""); - // List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction); - // for(Entry<String, Integer> property : mostFrequentProperties){ - // wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey()); - // wQ.setScore(wQ.getScore() + 0.1); - // } - } - - } - } - } - } - // else if(slot.getSlotType() == SlotType.CLASS){ - // String token = slot.getWords().get(0); - // if(slot.getToken().contains("house")){ - // String regexToken = token.replace("houses", "").replace("house", "").trim(); - // try { - // Map<Slot, SortedSet<Allocation>> ret = new SlotProcessor(new Slot(null, SlotType.CLASS, Collections.singletonList("house"))).call(); - // SortedSet<Allocation> alloc = ret.entrySet().iterator().next().getValue(); - // if(alloc != null && !alloc.isEmpty()){ - // String uri = alloc.first().getUri(); - // for(WeightedQuery query : queries){ - // Query q = query.getQuery(); - // for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - // SPARQL_Term subject = triple.getVariable(); - // SPARQL_Term object = new SPARQL_Term("desc"); - // object.setIsVariable(true); - // object.setIsURI(false); - // q.addCondition(new SPARQL_Triple(subject, new SPARQL_Property("<http://purl.org/goodrelations/v1#description>"), object)); - // q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - // object, "'" + regexToken + "'", SPARQL_PairType.REGEX))); - // } - // q.replaceVarWithURI(slot.getAnchor(), uri); - // - // } - // } - // } catch (Exception e) { - // e.printStackTrace(); - // } - // } - // } - - - } - - } - for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) { - WeightedQuery wQ = iterator.next(); - if(dropZeroScoredQueries){ - if(wQ.getScore() <= 0){ - iterator.remove(); - } - } else { - if(t.getSlots().size()==0) throw new AssertionError("no slots for query "+wQ); - wQ.setScore(wQ.getScore()/t.getSlots().size()); - } - - } - allQueries.addAll(queries); - List<Query> qList = new ArrayList<Query>(); - for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery()); - qList.add(wQ.getQuery()); - } - template2Queries.put(t, qList); + // System.out.println(template); } - logger.debug("...done in "); - return allQueries; + // + return null; } private double getProminenceValue(String uri, SlotType type){ @@ -1142,101 +845,51 @@ } - class SlotProcessor implements Callable<Map<Slot, SortedSet<Allocation>>>{ - - private Slot slot; - - public SlotProcessor(Slot slot) { - this.slot = slot; - } - - @Override - public Map<Slot, SortedSet<Allocation>> call() throws Exception { - Map<Slot, SortedSet<Allocation>> result = new HashMap<Slot, SortedSet<Allocation>>(); - result.put(slot, computeAllocations(slot)); - return result; - } - - private SortedSet<Allocation> computeAllocations(Slot slot){ - logger.debug("Computing allocations for slot: " + slot); - SortedSet<Allocation> allocations = new TreeSet<Allocation>(); - - Index index = getIndexBySlotType(slot); - - IndexResultSet rs; - for(String word : slot.getWords()){ - rs = new IndexResultSet(); - if(mappingIndex != null){ - SlotType type = slot.getSlotType(); - if(type == SlotType.CLASS){ - rs.add(mappingIndex.getClassesWithScores(word)); - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ - rs.add(mappingIndex.getPropertiesWithScores(word)); - } else if(type == SlotType.DATATYPEPROPERTY){ - rs.add(mappingIndex.getDatatypePropertiesWithScores(word)); - } else if(type == SlotType.OBJECTPROPERTY){ - rs.add(mappingIndex.getObjectPropertiesWithScores(word)); - } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ - rs.add(mappingIndex.getResourcesWithScores(word)); - } + private Set<IndexResultItem> getIndexResultItems(Slot slot) + { +// List<String> uris = new LinkedList<String>(); + Set<IndexResultItem> indexResultItems = new HashSet<IndexResultItem>(); + + Index index = getIndexBySlotType(slot); + + for(String word : slot.getWords()) + { + IndexResultSet rs = new IndexResultSet(); + if(mappingIndex != null){ + SlotType type = slot.getSlotType(); + if(type == SlotType.CLASS){ + rs.add(mappingIndex.getClassesWithScores(word)); + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ + rs.add(mappingIndex.getPropertiesWithScores(word)); + } else if(type == SlotType.DATATYPEPROPERTY){ + rs.add(mappingIndex.getDatatypePropertiesWithScores(word)); + } else if(type == SlotType.OBJECTPROPERTY){ + rs.add(mappingIndex.getObjectPropertiesWithScores(word)); + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + rs.add(mappingIndex.getResourcesWithScores(word)); } - //use the non manual indexes only if mapping based resultset is not empty and option is set - if(!useManualMappingsIfExistOnly || rs.isEmpty()){ - if(slot.getSlotType() == SlotType.RESOURCE){ - rs.add(index.getResourcesWithScores(word, 20)); - } else { - if(slot.getSlotType() == SlotType.CLASS){ - word = PlingStemmer.stem(word); - } - rs.add(index.getResourcesWithScores(word, 20)); + } + //use the non manual indexes only if mapping based resultset is not empty and option is set + if(!useManualMappingsIfExistOnly || rs.isEmpty()){ + if(slot.getSlotType() == SlotType.RESOURCE){ + rs.add(index.getResourcesWithScores(word, 20)); + } else { + if(slot.getSlotType() == SlotType.CLASS){ + word = PlingStemmer.stem(word); } + rs.add(index.getResourcesWithScores(word, 20)); } - - - for(IndexResultItem item : rs.getItems()){ - double similarity = Similarity.getSimilarity(word, item.getLabel()); - // //get the labels of the redirects and compute the highest similarity - // if(slot.getSlotType() == SlotType.RESOURCE){ - // Set<String> labels = getRedirectLabels(item.getUri()); - // for(String label : labels){ - // double tmp = Similarity.getSimilarity(word, label); - // if(tmp > similarity){ - // similarity = tmp; - // } - // } - // } - double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); - allocations.add(new Allocation(item.getUri(), prominence, similarity)); - } - } - - normProminenceValues(allocations); - - computeScore(allocations); - logger.debug("Found " + allocations.size() + " allocations for slot " + slot); - return new TreeSet<Allocation>(allocations); +// for(IndexResultItem item: rs.getItems()) +// { +// uris.add(item.getUri()); +// } + indexResultItems.addAll(rs.getItems()); } - - private Index getIndexBySlotType(Slot slot){ - Index index = null; - SlotType type = slot.getSlotType(); - if(type == SlotType.CLASS){ - index = classesIndex; - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ - index = propertiesIndex; - } else if(type == SlotType.DATATYPEPROPERTY){ - index = datatypePropertiesIndex; - } else if(type == SlotType.OBJECTPROPERTY){ - index = objectPropertiesIndex; - } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ - index = resourcesIndex; - } - return index; - } - + return indexResultItems; } + public String getTaggedInput() { if(templateGenerator==null) {throw new AssertionError("Learner not initialized. Please call init();");} @@ -1255,33 +908,33 @@ return isDatatypeProperty; } - /** - * @param args - * @throws NoTemplateFoundException - * @throws IOException - * @throws FileNotFoundException - * @throws InvalidFileFormatException - */ - public static void main(String[] args) throws Exception { - SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), - Collections.<String>singletonList(""), Collections.<String>emptyList()); - Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources"); - Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes"); - Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties"); + // /** + // * @param args + // * @throws NoTemplateFoundException + // * @throws IOException + // * @throws FileNotFoundException + // * @throws InvalidFileFormatException + // */ + // public static void main(String[] args) throws Exception { + // SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), + // Collections.<String>singletonList(""), Collections.<String>emptyList()); + // Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources"); + // Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes"); + // Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties"); + // + // SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); + // learner.init(); + // + // String question = "What is the highest mountain?"; + // + // learner.setQuestion(question); + // learner.learnSPARQLQueries(); + // System.out.println("Learned query:\n" + learner.getBestSPARQLQuery()); + // System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType()); + // System.out.println(learner.getLearnedPosition()); + // + // } - SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); - learner.init(); - String question = "What is the highest mountain?"; - learner.setQuestion(question); - learner.learnSPARQLQueries(); - System.out.println("Learned query:\n" + learner.getBestSPARQLQuery()); - System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType()); - System.out.println(learner.getLearnedPosition()); - - } - - - } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2012-09-19 15:12:20 UTC (rev 3845) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2012-09-19 15:15:56 UTC (rev 3846) @@ -58,7 +58,7 @@ unions = new HashSet<SPARQL_Union>(); } - //copy constructor + /** copy constructor*/ public Query(Query query){ this.qt = query.getQt(); Set<SPARQL_Term> selTerms = new HashSet<SPARQL_Term>(); Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-19 15:12:20 UTC (rev 3845) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-19 15:15:56 UTC (rev 3846) @@ -114,7 +114,7 @@ test("QALD 2 Benchmark ideally tagged", file,SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache,dbpediaLiveKnowledgebase,null,null); } - @Test public void testOxford() throws Exception + /*@Test*/ public void testOxford() throws Exception { File file = new File(getClass().getClassLoader().getResource("tbsl/evaluation/oxford_working_questions.xml").getFile()); test("Oxford 19 working questions", file,null,null,null,loadOxfordModel(),getOxfordMappingIndex()); @@ -160,6 +160,26 @@ logger.info("learned query: "+testData.id2Query.get(0)); } + /** For debugging one question in particular. + */ + @Test public void testSingleQueryDBpedia() + { +// Logger.getLogger(Templator.class).setLevel(Level.DEBUG); +// Logger.getLogger(Parser.class).setLevel(Level.DEBUG); +// Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.DEBUG); + // String question = "houses for less than 900000 pounds"; + String question = "Give/VB me/PRP all/DT video/JJ games/NNS published/VBN by/IN Mean/NNP Hamster/NNP Software/NNP"; +// String question = "give me all video games published by mean hamster software"; +// String question = "Give me all video games published by Mean Hamster Software"; +// question = new StanfordPartOfSpeechTagger().tag(question); +// System.out.println(question); + +// Model model = loadOxfordModel(); + QueryTestData testData = new QueryTestData(); + new LearnQueryCallable(question, 0, testData, dbpediaLiveKnowledgebase, true).call(); + logger.info("learned query: "+testData.id2Query.get(0)); + } + /*@Test*/ public void generateXMLOxford() throws IOException { boolean ADD_POS_TAGS = true; @@ -935,6 +955,7 @@ learner = new SPARQLTemplateBasedLearner2(knowledgeBase,pretagged?null:POSTaggerHolder.posTagger,wordnet,options); try {learner.init();} catch (ComponentInitException e) {throw new RuntimeException(e);} learner.setUseIdealTagger(pretagged); + learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex"}); } public LearnQueryCallable(String question, int id, QueryTestData testData, Model model,MappingBasedIndex index,boolean pretagged) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-09-21 14:12:42
|
Revision: 3848 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3848&view=rev Author: kirdie Date: 2012-09-21 14:12:34 +0000 (Fri, 21 Sep 2012) Log Message: ----------- more work on the hmm. Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/Index.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/IndexResultItem.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -5,6 +5,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; @@ -34,6 +35,7 @@ import org.dllearner.algorithm.tbsl.util.Knowledgebase; import org.dllearner.algorithm.tbsl.util.PopularityMap; import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType; +import org.dllearner.algorithm.tbsl.util.Similarity; import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; import org.dllearner.common.index.IndexResultSet; @@ -55,6 +57,8 @@ import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.reasoning.SPARQLReasoner; import org.ini4j.Options; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; import com.hp.hpl.jena.ontology.OntModelSpec; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; @@ -74,8 +78,11 @@ } private Mode mode = Mode.BEST_QUERY; + private static SimpleIRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); + private static final boolean CREATE_SYNONYMS = false; + private static final Double BOA_THRESHOLD = 0.9; private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); @@ -371,7 +378,7 @@ logger.debug("Generating SPARQL query templates..."); templateMon.start(); if(multiThreaded){ - templates = templateGenerator.buildTemplatesMultiThreaded(question); + templates = templateGenerator.buildTemplatesMultiThreaded(question,CREATE_SYNONYMS); } else { templates = templateGenerator.buildTemplates(question); } @@ -512,8 +519,9 @@ SortedSet<WeightedQuery> queries = new TreeSet<WeightedQuery>(); Query query = template.getQuery(); double score = 0; - + Map<List<String>,List<ResourceInfo>> segmentToURIs = new HashMap<List<String>,List<ResourceInfo>>(); + Map<String,IndexResultItem> uriUniqueToResultItem = new HashMap<String,IndexResultItem>(); for(Slot slot: template.getSlots()) { List<String> segment = new LinkedList<String>(); @@ -525,9 +533,19 @@ // if this gets used at another place, create a function IndexResultItemToResourceInfo() ResourceInfo info = new ResourceInfo(); info.setUri(item.getUri()); - info.setLabel(item.getLabel()); + String label = item.getLabel(); + // in dbpedia, the last part of the uri is transformed from the english label, reverse the transformation (should almost always work for dbpedia article resources) + info.setLabel(label!=null?label:sfp.getShortForm(IRI.create(item.getUri()))); + // in saedeehs algorithm, the emission probabilty is formed by the string similarity + // but we use the lucene index score + double max = 0; + for(String word: slot.getWords()) {max = Math.max(max, Similarity.getSimilarity(word, info.getLabel()));} + if(max<0||max>1) throw new AssertionError("max is not in [0,1], max="+max); + info.setStringSimilarityScore(max); + + resourceInfos.add(info); } - segmentToURIs.put(segment,resources); + segmentToURIs.put(segment,resourceInfos); } HiddenMarkovModel hmm = new HiddenMarkovModel(); hmm.initialization(); @@ -851,11 +869,11 @@ private Set<IndexResultItem> getIndexResultItems(Slot slot) { -// List<String> uris = new LinkedList<String>(); + // List<String> uris = new LinkedList<String>(); Set<IndexResultItem> indexResultItems = new HashSet<IndexResultItem>(); - + Index index = getIndexBySlotType(slot); - + for(String word : slot.getWords()) { IndexResultSet rs = new IndexResultSet(); @@ -876,18 +894,23 @@ //use the non manual indexes only if mapping based resultset is not empty and option is set if(!useManualMappingsIfExistOnly || rs.isEmpty()){ if(slot.getSlotType() == SlotType.RESOURCE){ - rs.add(index.getResourcesWithScores(word, 20)); + rs.add(index.getResourcesWithScores(word, 20,0)); } else { if(slot.getSlotType() == SlotType.CLASS){ word = PlingStemmer.stem(word); } - rs.add(index.getResourcesWithScores(word, 20)); + IndexResultSet tmp = index.getResourcesWithScores(word, 20,0,Collections.singleton("boa-score")); + for(IndexResultItem item : tmp.getItems()) + {System.out.println(item); + Double boaScore = (Double) item.getFields().get("boa-score"); + if(boaScore==null||boaScore>BOA_THRESHOLD) rs.addItem(item); + } } } -// for(IndexResultItem item: rs.getItems()) -// { -// uris.add(item.getUri()); -// } + // for(IndexResultItem item: rs.getItems()) + // { + // uris.add(item.getUri()); + // } indexResultItems.addAll(rs.getItems()); } return indexResultItems; Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -31,111 +31,111 @@ import org.dllearner.algorithm.tbsl.sparql.Template; public class Templator { - + private static final Logger logger = Logger.getLogger(Templator.class); - - String[] GRAMMAR_FILES = {"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}; - + + String[] GRAMMAR_FILES = {"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}; + private String[] noun = {"NN","NNS","NNP","NNPS","NPREP","JJNN","JJNPREP"}; private String[] adjective = {"JJ","JJR","JJS","JJH"}; private String[] verb = {"VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN","GERUNDIN","VPREP","WHEN","WHERE"}; - + PartOfSpeechTagger tagger; LTAGLexicon g; LTAG_Lexicon_Constructor LTAG_Constructor = new LTAG_Lexicon_Constructor(); Parser parser; Preprocessor pp; - + WordNet wordnet; LingPipeLemmatizer lem = new LingPipeLemmatizer(); - - DUDE2UDRS_Converter d2u = new DUDE2UDRS_Converter(); - DRS2SPARQL_Converter d2s = new DRS2SPARQL_Converter(); - + + DUDE2UDRS_Converter d2u = new DUDE2UDRS_Converter(); + DRS2SPARQL_Converter d2s = new DRS2SPARQL_Converter(); + boolean ONE_SCOPE_ONLY = true; boolean UNTAGGED_INPUT = true; boolean USE_NER = false; boolean USE_WORDNET = true; boolean VERBOSE = true; - + private String taggedInput; - + private Set<Template> templates; private Set<DRS> drses; - + public Templator() { this(new StanfordPartOfSpeechTagger(), new WordNet()); } - + public Templator(final PartOfSpeechTagger tagger) { this(tagger, new WordNet()); } - + public Templator(final PartOfSpeechTagger tagger, WordNet wordnet) { - this.tagger = tagger; - this.wordnet = wordnet; - - List<InputStream> grammarFiles = new ArrayList<InputStream>(); - for(int i = 0; i < GRAMMAR_FILES.length; i++){ - grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); - } - - g = LTAG_Constructor.construct(grammarFiles); - - parser = new Parser(); - parser.SHOW_GRAMMAR = true; - parser.USE_DPS_AS_INITTREES = true; - parser.CONSTRUCT_SEMANTICS = true; - parser.MODE = "LEIPZIG"; - - pp = new Preprocessor(USE_NER); + this.tagger = tagger; + this.wordnet = wordnet; + + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } + + g = LTAG_Constructor.construct(grammarFiles); + + parser = new Parser(); + parser.SHOW_GRAMMAR = true; + parser.USE_DPS_AS_INITTREES = true; + parser.CONSTRUCT_SEMANTICS = true; + parser.MODE = "LEIPZIG"; + + pp = new Preprocessor(USE_NER); } - + public Templator(final PartOfSpeechTagger tagger, WordNet wordnet, String[] GRAMMAR_FILES) { - this.tagger = tagger; - this.wordnet = wordnet; - this.GRAMMAR_FILES = GRAMMAR_FILES; + this.tagger = tagger; + this.wordnet = wordnet; + this.GRAMMAR_FILES = GRAMMAR_FILES; - List<InputStream> grammarFiles = new ArrayList<InputStream>(); - for(int i = 0; i < GRAMMAR_FILES.length; i++) { - grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); - } + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++) { + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } - g = LTAG_Constructor.construct(grammarFiles); + g = LTAG_Constructor.construct(grammarFiles); - parser = new Parser(); - parser.SHOW_GRAMMAR = true; - parser.USE_DPS_AS_INITTREES = true; - parser.CONSTRUCT_SEMANTICS = true; - parser.MODE = "LEIPZIG"; + parser = new Parser(); + parser.SHOW_GRAMMAR = true; + parser.USE_DPS_AS_INITTREES = true; + parser.CONSTRUCT_SEMANTICS = true; + parser.MODE = "LEIPZIG"; - pp = new Preprocessor(USE_NER); -} - + pp = new Preprocessor(USE_NER); + } + public Templator(boolean b) { - this.tagger = new StanfordPartOfSpeechTagger(); - this.USE_WORDNET = false; - VERBOSE = b; - - List<InputStream> grammarFiles = new ArrayList<InputStream>(); - for(int i = 0; i < GRAMMAR_FILES.length; i++){ - grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); - } - - g = LTAG_Constructor.construct(grammarFiles); - - parser = new Parser(); - parser.SHOW_GRAMMAR = false; - parser.VERBOSE = b; - parser.USE_DPS_AS_INITTREES = true; - parser.CONSTRUCT_SEMANTICS = true; - parser.MODE = "LEIPZIG"; - - pp = new Preprocessor(USE_NER); - pp.setVERBOSE(b); + this.tagger = new StanfordPartOfSpeechTagger(); + this.USE_WORDNET = false; + VERBOSE = b; + + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } + + g = LTAG_Constructor.construct(grammarFiles); + + parser = new Parser(); + parser.SHOW_GRAMMAR = false; + parser.VERBOSE = b; + parser.USE_DPS_AS_INITTREES = true; + parser.CONSTRUCT_SEMANTICS = true; + parser.MODE = "LEIPZIG"; + + pp = new Preprocessor(USE_NER); + pp.setVERBOSE(b); } - + public void setUNTAGGED_INPUT(boolean b) { UNTAGGED_INPUT = b; } @@ -146,20 +146,21 @@ VERBOSE = b; } public void setGrammarFiles(String[] gf) { - GRAMMAR_FILES = gf; - List<InputStream> grammarFiles = new ArrayList<InputStream>(); - for(int i = 0; i < GRAMMAR_FILES.length; i++){ - grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); - } - g = LTAG_Constructor.construct(grammarFiles); + GRAMMAR_FILES = gf; + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } + g = LTAG_Constructor.construct(grammarFiles); } - public Set<Template> buildTemplates(String s) { - - d2s.setInputString(s); - + public Set<Template> buildTemplates(String s) {return buildTemplates(s,true);} + public Set<Template> buildTemplates(String s, boolean createSynonyms) { + + d2s.setInputString(s); + boolean clearAgain = true; - + String tagged; if (UNTAGGED_INPUT) { s = pp.normalize(s); @@ -176,135 +177,140 @@ newtagged = pp.condenseNominals(pp.findNEs(tagged,s)); } else newtagged = pp.condenseNominals(tagged); - + newtagged = pp.condense(newtagged); logger.debug("Preprocessed: " + newtagged); - - parser.parse(newtagged,g); - - if (parser.getDerivationTrees().isEmpty()) { - parser.clear(g,parser.getTemps()); - clearAgain = false; - if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed."); - } - else { - try { - parser.buildDerivedTrees(g); - } catch (ParseException e) { - if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); - } - } - // build pairs <String,POStag> from tagged - Hashtable<String,String> postable = new Hashtable<String,String>(); - for (String st : newtagged.split(" ")) { + parser.parse(newtagged,g); + + if (parser.getDerivationTrees().isEmpty()) { + parser.clear(g,parser.getTemps()); + clearAgain = false; + if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed."); + } + else { + try { + parser.buildDerivedTrees(g); + } catch (ParseException e) { + if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); + } + } + + // build pairs <String,POStag> from tagged + Hashtable<String,String> postable = new Hashtable<String,String>(); + for (String st : newtagged.split(" ")) { postable.put(st.substring(0,st.indexOf("/")).toLowerCase(),st.substring(st.indexOf("/")+1));; } - // - - Set<DRS> drses = new HashSet<DRS>(); - Set<Template> templates = new HashSet<Template>(); - - for (Dude dude : parser.getDudes()) { - UDRS udrs = d2u.convert(dude); - if (udrs != null) { - - for (DRS drs : udrs.initResolve()) { - - List<Slot> slots = new ArrayList<Slot>(); - slots.addAll(dude.getSlots()); - d2s.setSlots(slots); - d2s.redundantEqualRenaming(drs); - - if (!containsModuloRenaming(drses,drs)) { -// // DEBUG - if (VERBOSE) { - logger.debug(">>> DUDE:\n" + dude.toString()); - logger.debug("\n>>> DRS:\n"+ drs.toString()); - for (Slot sl : slots) { - logger.debug(sl.toString()); - } - } -// // - drses.add(drs); - - try { - Template temp = d2s.convert(drs,slots); - if (temp == null) { continue; } - temp = temp.checkandrefine(); - if (temp == null) { continue; } - - if (USE_WORDNET) { // find WordNet synonyms - List<String> newwords; - String word; - String pos; - for (Slot slot : temp.getSlots()) { - if (!slot.getWords().isEmpty()) { - - word = slot.getWords().get(0); - pos = postable.get(word.toLowerCase().replace(" ","_")); - - POS wordnetpos = null; - if (pos != null) { - if (equalsOneOf(pos,noun)) { - wordnetpos = POS.NOUN; - } - else if (equalsOneOf(pos,adjective)) { - wordnetpos = POS.ADJECTIVE; - } - else if (equalsOneOf(pos,verb)) { - wordnetpos = POS.VERB; - } - } - - List<String> strings = new ArrayList<String>(); - if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { - strings = wordnet.getAttributes(word); - } - - newwords = new ArrayList<String>(); - newwords.addAll(slot.getWords()); - newwords.addAll(strings); - - if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); - for (String att : getLemmatizedWords(strings)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); - } - } - if (newwords.isEmpty()) { - newwords.add(slot.getWords().get(0)); - } - List<String> newwordslist = new ArrayList<String>(); - newwordslist.addAll(newwords); - slot.setWords(newwordslist); - } - } - } - // - - templates.add(temp); - } catch (java.lang.ClassCastException e) { - continue; - } - if (ONE_SCOPE_ONLY) { break; } - } - } - } - } - - if (clearAgain) { - parser.clear(g,parser.getTemps()); - } -// System.gc(); - - return templates; - } - - public Set<Template> buildTemplatesMultiThreaded(String s) { - + // + + Set<DRS> drses = new HashSet<DRS>(); + Set<Template> templates = new HashSet<Template>(); + + for (Dude dude : parser.getDudes()) { + UDRS udrs = d2u.convert(dude); + if (udrs != null) { + + for (DRS drs : udrs.initResolve()) { + + List<Slot> slots = new ArrayList<Slot>(); + slots.addAll(dude.getSlots()); + d2s.setSlots(slots); + d2s.redundantEqualRenaming(drs); + + if (!containsModuloRenaming(drses,drs)) { + // // DEBUG + if (VERBOSE) { + logger.debug(">>> DUDE:\n" + dude.toString()); + logger.debug("\n>>> DRS:\n"+ drs.toString()); + for (Slot sl : slots) { + logger.debug(sl.toString()); + } + } + // // + drses.add(drs); + + try { + Template temp = d2s.convert(drs,slots); + if (temp == null) { continue; } + temp = temp.checkandrefine(); + if (temp == null) { continue; } + + if (USE_WORDNET) { // find WordNet synonyms + List<String> newwords; + String word; + String pos; + for (Slot slot : temp.getSlots()) { + if (!slot.getWords().isEmpty()) { + + word = slot.getWords().get(0); + pos = postable.get(word.toLowerCase().replace(" ","_")); + + POS wordnetpos = null; + if (pos != null) { + if (equalsOneOf(pos,noun)) { + wordnetpos = POS.NOUN; + } + else if (equalsOneOf(pos,adjective)) { + wordnetpos = POS.ADJECTIVE; + } + else if (equalsOneOf(pos,verb)) { + wordnetpos = POS.VERB; + } + } + + List<String> strings = new ArrayList<String>(); + if(createSynonyms) + { + if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) {strings = wordnet.getAttributes(word);} + } + + newwords = new ArrayList<String>(); + newwords.addAll(slot.getWords()); + newwords.addAll(strings); + + if(createSynonyms) + { + if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); + for (String att : getLemmatizedWords(strings)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + } + } + } + if (newwords.isEmpty()) { + newwords.add(slot.getWords().get(0)); + } + List<String> newwordslist = new ArrayList<String>(); + newwordslist.addAll(newwords); + slot.setWords(newwordslist); + } + } + } + // + + templates.add(temp); + } catch (java.lang.ClassCastException e) { + continue; + } + if (ONE_SCOPE_ONLY) { break; } + } + } + } + } + + if (clearAgain) { + parser.clear(g,parser.getTemps()); + } + // System.gc(); + + return templates; + } + + public Set<Template> buildTemplatesMultiThreaded(String s) {return buildTemplates(s,true);} + public Set<Template> buildTemplatesMultiThreaded(String s,boolean createSynonyms) { + boolean clearAgain = true; - + String tagged; if (UNTAGGED_INPUT) { s = pp.normalize(s); @@ -321,148 +327,153 @@ newtagged = pp.condenseNominals(pp.findNEs(tagged,s)); } else newtagged = pp.condenseNominals(tagged); - + newtagged = pp.condense(newtagged); logger.debug("Preprocessed: " + newtagged); - - parser.parseMultiThreaded(newtagged,g); - - if (parser.getDerivationTrees().isEmpty()) { - parser.clear(g,parser.getTemps()); - clearAgain = false; - logger.error("[Templator.java] '" + s + "' could not be parsed."); - } - else { - try { - parser.buildDerivedTreesMultiThreaded(g); - } catch (ParseException e) { - logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); - } - } - // build pairs <String,POStag> from tagged - Hashtable<String,String> postable = new Hashtable<String,String>(); - for (String st : newtagged.split(" ")) { + parser.parseMultiThreaded(newtagged,g); + + if (parser.getDerivationTrees().isEmpty()) { + parser.clear(g,parser.getTemps()); + clearAgain = false; + logger.error("[Templator.java] '" + s + "' could not be parsed."); + } + else { + try { + parser.buildDerivedTreesMultiThreaded(g); + } catch (ParseException e) { + logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); + } + } + + // build pairs <String,POStag> from tagged + Hashtable<String,String> postable = new Hashtable<String,String>(); + for (String st : newtagged.split(" ")) { postable.put(st.substring(0,st.indexOf("/")).toLowerCase(),st.substring(st.indexOf("/")+1));; } - // - - drses = new HashSet<DRS>(); - templates = new HashSet<Template>(); - -// ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); -// for (Dude dude : p.getDudes()) { -// threadPool.execute(new DudeProcessor(dude, postable)); -// } -// threadPool.shutdown(); -// while(!threadPool.isTerminated()){} - - for (Dude dude : parser.getDudes()) { - - UDRS udrs = d2u.convert(dude); - if (udrs != null) { - - for (DRS drs : udrs.initResolve()) { - - List<Slot> slots = new ArrayList<Slot>(); - slots.addAll(dude.getSlots()); - d2s.setSlots(slots); - d2s.redundantEqualRenaming(drs); - - if (!containsModuloRenaming(drses,drs)) { -// // DEBUG - logger.debug(dude); - logger.debug(drs); - for (Slot sl : slots) { - logger.debug(sl.toString()); - } -// // - drses.add(drs); - - try { - Template temp = d2s.convert(drs,slots); - if (temp == null) { continue; } - temp = temp.checkandrefine(); - if (temp == null) { continue; } - - - if (USE_WORDNET) { // find WordNet synonyms - List<String> newwords; - String word; - String pos; - for (Slot slot : temp.getSlots()) { - if (!slot.getWords().isEmpty()) { - - word = slot.getWords().get(0); - pos = postable.get(word.toLowerCase().replace(" ","_")); - - POS wordnetpos = null; - if (pos != null) { - if (equalsOneOf(pos,noun)) { - wordnetpos = POS.NOUN; - } - else if (equalsOneOf(pos,adjective)) { - wordnetpos = POS.ADJECTIVE; - } - else if (equalsOneOf(pos,verb)) { - wordnetpos = POS.VERB; - } - } - - List<String> strings = new ArrayList<String>(); - if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { - strings = wordnet.getAttributes(word); - } - - newwords = new ArrayList<String>(); - newwords.addAll(slot.getWords()); - newwords.addAll(strings); - - if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); - for (String att : getLemmatizedWords(strings)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); - } - } - if (newwords.isEmpty()) { - newwords.add(slot.getWords().get(0)); - } - List<String> newwordslist = new ArrayList<String>(); - newwordslist.addAll(newwords); - slot.setWords(newwordslist); - } - } - } - // - - templates.add(temp); - } catch (java.lang.ClassCastException e) { - continue; - } - if (ONE_SCOPE_ONLY) { break; } - } - } - + // + + drses = new HashSet<DRS>(); + templates = new HashSet<Template>(); + + // ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + // for (Dude dude : p.getDudes()) { + // threadPool.execute(new DudeProcessor(dude, postable)); + // } + // threadPool.shutdown(); + // while(!threadPool.isTerminated()){} + + for (Dude dude : parser.getDudes()) { + + UDRS udrs = d2u.convert(dude); + if (udrs != null) { + + for (DRS drs : udrs.initResolve()) { + + List<Slot> slots = new ArrayList<Slot>(); + slots.addAll(dude.getSlots()); + d2s.setSlots(slots); + d2s.redundantEqualRenaming(drs); + + if (!containsModuloRenaming(drses,drs)) { + // // DEBUG + logger.debug(dude); + logger.debug(drs); + for (Slot sl : slots) { + logger.debug(sl.toString()); + } + // // + drses.add(drs); + + try { + Template temp = d2s.convert(drs,slots); + if (temp == null) { continue; } + temp = temp.checkandrefine(); + if (temp == null) { continue; } + + + if (USE_WORDNET) { // find WordNet synonyms + List<String> newwords; + String word; + String pos; + for (Slot slot : temp.getSlots()) { + if (!slot.getWords().isEmpty()) { + + word = slot.getWords().get(0); + pos = postable.get(word.toLowerCase().replace(" ","_")); + + POS wordnetpos = null; + if (pos != null) { + if (equalsOneOf(pos,noun)) { + wordnetpos = POS.NOUN; + } + else if (equalsOneOf(pos,adjective)) { + wordnetpos = POS.ADJECTIVE; + } + else if (equalsOneOf(pos,verb)) { + wordnetpos = POS.VERB; + } + } + + List<String> strings = new ArrayList<String>(); + if(createSynonyms) + { + if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { + strings = wordnet.getAttributes(word); + } + } + newwords = new ArrayList<String>(); + newwords.addAll(slot.getWords()); + newwords.addAll(strings); + + if(createSynonyms) + { + if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); + for (String att : getLemmatizedWords(strings)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + } + } + } + if (newwords.isEmpty()) { + newwords.add(slot.getWords().get(0)); + } + List<String> newwordslist = new ArrayList<String>(); + newwordslist.addAll(newwords); + slot.setWords(newwordslist); + } + } + } + // + + templates.add(temp); + } catch (java.lang.ClassCastException e) { + continue; + } + if (ONE_SCOPE_ONLY) { break; } + } + } + + } + } + + + if (clearAgain) { + parser.clear(g,parser.getTemps()); + } + // System.gc(); + + return templates; } - } - - - if (clearAgain) { - parser.clear(g,parser.getTemps()); - } -// System.gc(); - - return templates; - } - + public String getTaggedInput() { return taggedInput; } - + public List<String> getUnknownWords(){ return parser.getUnknownWords(); } - + private List<String> getLemmatizedWords(List<String> words){ List<String> stemmed = new ArrayList<String>(); for(String word : words){ @@ -472,15 +483,15 @@ } else { stemmed.add(getLemmatizedWord(word)); } - + } return stemmed; } - + private String getLemmatizedWord(String word){ return lem.stem(word); } - + private boolean containsModuloRenaming(Set<DRS> drses, DRS drs) { for (DRS d : drses) { @@ -490,7 +501,7 @@ } return false; } - + private boolean equalsOneOf(String string,String[] strings) { for (String s : strings) { if (string.equals(s)) { @@ -499,30 +510,30 @@ } return false; } - + private String extractSentence(String taggedSentence){ - int pos = taggedSentence.indexOf("/"); - while(pos != -1){ - String first = taggedSentence.substring(0, pos); - int endPos = taggedSentence.substring(pos).indexOf(" "); - if(endPos == -1){ - endPos = taggedSentence.substring(pos).length(); - } - String rest = taggedSentence.substring(pos + endPos); - - taggedSentence = first + rest; - pos = taggedSentence.indexOf("/"); - - } - return taggedSentence; - - } - + int pos = taggedSentence.indexOf("/"); + while(pos != -1){ + String first = taggedSentence.substring(0, pos); + int endPos = taggedSentence.substring(pos).indexOf(" "); + if(endPos == -1){ + endPos = taggedSentence.substring(pos).length(); + } + String rest = taggedSentence.substring(pos + endPos); + + taggedSentence = first + rest; + pos = taggedSentence.indexOf("/"); + + } + return taggedSentence; + + } + class DudeProcessor implements Runnable{ - + private Dude dude; private Hashtable<String,String> postable; - + public DudeProcessor(Dude dude, Hashtable<String,String> postable) { this.dude = dude; this.postable = postable; @@ -530,94 +541,94 @@ @Override public void run() { - UDRS udrs = d2u.convert(dude); - if (udrs != null) { - - for (DRS drs : udrs.initResolve()) { - - List<Slot> slots = new ArrayList<Slot>(); - slots.addAll(dude.getSlots()); - d2s.setSlots(slots); - d2s.redundantEqualRenaming(drs); - - if (!containsModuloRenaming(drses,drs)) { -// // DEBUG - if (VERBOSE) { - logger.debug(dude); - logger.debug(drs); - for (Slot sl : slots) { - logger.debug(sl.toString()); - } - } -// // - drses.add(drs); - - try { - Template temp = d2s.convert(drs,slots); - temp = temp.checkandrefine(); - if (temp == null) { - continue; - } - - if (USE_WORDNET) { // find WordNet synonyms - List<String> newwords; - String word; - String pos; - for (Slot slot : temp.getSlots()) { - if (!slot.getWords().isEmpty()) { - - word = slot.getWords().get(0); - pos = postable.get(word.toLowerCase().replace(" ","_")); - - POS wordnetpos = null; - if (pos != null) { - if (equalsOneOf(pos,noun)) { - wordnetpos = POS.NOUN; - } - else if (equalsOneOf(pos,adjective)) { - wordnetpos = POS.ADJECTIVE; - } - else if (equalsOneOf(pos,verb)) { - wordnetpos = POS.VERB; - } - } - - List<String> strings = new ArrayList<String>(); - if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { - strings = wordnet.getAttributes(word); - } - - newwords = new ArrayList<String>(); - newwords.addAll(slot.getWords()); - newwords.addAll(strings); - - if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); - for (String att : getLemmatizedWords(strings)) { - newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); - } - } - if (newwords.isEmpty()) { - newwords.add(slot.getWords().get(0)); - } - List<String> newwordslist = new ArrayList<String>(); - newwordslist.addAll(newwords); - slot.setWords(newwordslist); - } - } - } - // - - templates.add(temp); - } catch (java.lang.ClassCastException e) { - continue; - } - if (ONE_SCOPE_ONLY) { break; } - } - } - } + UDRS udrs = d2u.convert(dude); + if (udrs != null) { + + for (DRS drs : udrs.initResolve()) { + + List<Slot> slots = new ArrayList<Slot>(); + slots.addAll(dude.getSlots()); + d2s.setSlots(slots); + d2s.redundantEqualRenaming(drs); + + if (!containsModuloRenaming(drses,drs)) { + // // DEBUG + if (VERBOSE) { + logger.debug(dude); + logger.debug(drs); + for (Slot sl : slots) { + logger.debug(sl.toString()); + } + } + // // + drses.add(drs); + + try { + Template temp = d2s.convert(drs,slots); + temp = temp.checkandrefine(); + if (temp == null) { + continue; + } + + if (USE_WORDNET) { // find WordNet synonyms + List<String> newwords; + String word; + String pos; + for (Slot slot : temp.getSlots()) { + if (!slot.getWords().isEmpty()) { + + word = slot.getWords().get(0); + pos = postable.get(word.toLowerCase().replace(" ","_")); + + POS wordnetpos = null; + if (pos != null) { + if (equalsOneOf(pos,noun)) { + wordnetpos = POS.NOUN; + } + else if (equalsOneOf(pos,adjective)) { + wordnetpos = POS.ADJECTIVE; + } + else if (equalsOneOf(pos,verb)) { + wordnetpos = POS.VERB; + } + } + + List<String> strings = new ArrayList<String>(); + if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { + strings = wordnet.getAttributes(word); + } + + newwords = new ArrayList<String>(); + newwords.addAll(slot.getWords()); + newwords.addAll(strings); + + if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); + for (String att : getLemmatizedWords(strings)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + } + } + if (newwords.isEmpty()) { + newwords.add(slot.getWords().get(0)); + } + List<String> newwordslist = new ArrayList<String>(); + newwordslist.addAll(newwords); + slot.setWords(newwordslist); + } + } + } + // + + templates.add(temp); + } catch (java.lang.ClassCastException e) { + continue; + } + if (ONE_SCOPE_ONLY) { break; } + } + } + } } - + } } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/HierarchicalIndex.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -1,6 +1,8 @@ package org.dllearner.common.index; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.List; public class HierarchicalIndex implements Index{ @@ -48,9 +50,15 @@ @Override public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset) { - IndexResultSet rs = primaryIndex.getResourcesWithScores(queryString, limit, offset); + return getResourcesWithScores(queryString, limit, DEFAULT_OFFSET,Collections.<String>emptyList()); + } + + @Override public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset, + Collection<String> additionalFields) + { + IndexResultSet rs = primaryIndex.getResourcesWithScores(queryString, limit, offset, additionalFields); if(rs.getItems().size() < limit){ - rs.add(secondaryIndex.getResourcesWithScores(queryString, limit-rs.getItems().size(), offset)); + rs.add(secondaryIndex.getResourcesWithScores(queryString, limit-rs.getItems().size(), offset,additionalFields)); } return rs; } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/Index.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/Index.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/Index.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -1,5 +1,6 @@ package org.dllearner.common.index; +import java.util.Collection; import java.util.List; import java.util.Map; @@ -10,4 +11,5 @@ IndexResultSet getResourcesWithScores(String queryString); IndexResultSet getResourcesWithScores(String queryString, int limit); IndexResultSet getResourcesWithScores(String queryString, int limit, int offset); + IndexResultSet getResourcesWithScores(String queryString, int limit, int offset, Collection<String> additionalFields); } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/IndexResultItem.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/IndexResultItem.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/IndexResultItem.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -1,26 +1,34 @@ package org.dllearner.common.index; +import java.util.Collections; +import java.util.Map; + public class IndexResultItem { + private final String uri; + private final String label; + private final float score; + private final Map<String,? extends Object> fields; - private String uri; - private String label; - private float score; + public IndexResultItem(String uri, String label, float score) + {this(uri,label,score,Collections.<String,Object>emptyMap());} - public IndexResultItem(String uri, String label, float score) { + public IndexResultItem(String uri, String label, float score,Map<String,? extends Object> fields) + { this.uri = uri; this.label = label; this.score = score; + if(fields==null) throw new AssertionError("fields null"); + this.fields = fields; } - public String getUri() { - return uri; - } + public String getUri() {return uri;} + public String getLabel() {return label; } + public float getScore() {return score;} + public Map<String,? extends Object> getFields() {return fields;} - public String getLabel() { - return label; + @Override public String toString() + { + // TODO Auto-generated method stub + return "label:" + label + "--uri:" + uri + "--fields:" + fields; } - - public float getScore() { - return score; - } } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -2,8 +2,11 @@ import java.net.MalformedURLException; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; import java.util.List; - +import java.util.Map; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery.ORDER; import org.apache.solr.client.solrj.SolrServerException; @@ -15,19 +18,19 @@ import org.apache.solr.common.params.ModifiableSolrParams; public class SOLRIndex implements Index{ - -private CommonsHttpSolrServer server; - + + private CommonsHttpSolrServer server; + private static final int DEFAULT_LIMIT = 10; private static final int DEFAULT_OFFSET = 0; - + private String primarySearchField; private String secondarySearchField; - + private String sortField; - + private boolean restrictiveSearch = true; - + public SOLRIndex(String solrServerURL){ try { server = new CommonsHttpSolrServer(solrServerURL); @@ -36,20 +39,20 @@ e.printStackTrace(); } } - + public void setSearchFields(String primarySearchField, String secondarySearchField){ this.primarySearchField = primarySearchField; this.secondarySearchField = secondarySearchField; } - + public void setPrimarySearchField(String primarySearchField) { this.primarySearchField = primarySearchField; } - + public void setSecondarySearchField(String secondarySearchField) { this.secondarySearchField = secondarySearchField; } - + @Override public List<String> getResources(String queryString) { return getResources(queryString, DEFAULT_LIMIT); @@ -91,9 +94,12 @@ } @Override - public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset) { - IndexResultSet rs = new IndexResultSet(); - + public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset) + {return getResourcesWithScores(queryString,limit,offset,Collections.<String>emptyList());} + + public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset, Collection<String> additionalFields) + { + IndexResultSet rs = new IndexResultSet(); QueryResponse response; try { String solrString = queryString; @@ -112,36 +118,43 @@ } solrString += ")"; } - + } else { solrString += queryString; } } SolrQuery query = new SolrQuery(solrString); - query.setRows(limit); - query.setStart(offset); - if(sortField != null){ - query.addSortField(sortField, ORDER.desc); - } - query.addField("score"); + query.setRows(limit); + query.setStart(offset); + if(sortField != null){ + query.addSortField(sortField, ORDER.desc); + } + query.addField("score"); response = server.query(query); SolrDocumentList docList = response.getResults(); - - for(SolrDocument d : docList){ + + for(SolrDocument d : docList) + { + Map<String,Object> fields = new HashMap<String,Object>(); + for(String field: additionalFields) + { + Object o = d.get(field); + if(o!=null) {fields.put(field,o);} + } float score = 0; if(d.get("score") instanceof ArrayList){ score = ((Float)((ArrayList)d.get("score")).get(1)); } else { score = (Float) d.get("score"); } - rs.addItem(new IndexResultItem((String) d.get("uri"), (String) d.get("label"), score)); + rs.addItem(new IndexResultItem((String) d.get("uri"), (String) d.get("label"), score,fields)); } } catch (SolrServerException e) { e.printStackTrace(); } return rs; } - + public void setSortField(String sortField){ this.sortField = sortField; } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -1,6 +1,7 @@ package org.dllearner.common.index; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import org.dllearner.kb.sparql.ExtractionDBCache; @@ -150,5 +151,11 @@ public Model getModel() { return model; } + + @Override public IndexResultSet getResourcesWithScores(String queryString, int limit, int offset, + Collection<String> additionalFields) + { + throw new UnsupportedOperationException("TODO: implement this later"); + } } Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-20 15:44:22 UTC (rev 3847) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-21 14:12:34 UTC (rev 3848) @@ -806,7 +806,7 @@ Index propertiesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_properties"); SOLRIndex boa_propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/boa_fact_detail"); boa_propertiesIndex.setSortField("boa-score"); - propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex); +// propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex); MappingBasedIndex mappingIndex= new MappingBasedIndex( SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_class_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_resource_mappings.txt").getPath(), This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-09-26 14:44:53
|
Revision: 3849 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3849&view=rev Author: kirdie Date: 2012-09-26 14:44:42 +0000 (Wed, 26 Sep 2012) Log Message: ----------- test case now successfully disambiguates the Mean Hamster Software example from the hmm paper. Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-21 14:12:34 UTC (rev 3848) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-26 14:44:42 UTC (rev 3849) @@ -71,17 +71,21 @@ import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; -public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm{ - - enum Mode{ - BEST_QUERY, BEST_NON_EMPTY_QUERY - } - +/** The old learner taken over by Konrad Höffner for experiments with the Hidden Markov Algorithm by Saedeeh Shekarpur. + * + * */ +public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm +{ + enum Mode {BEST_QUERY, BEST_NON_EMPTY_QUERY} private Mode mode = Mode.BEST_QUERY; + + /** used to create a label out of the URI when there is no label available in the SPARQL endpoint.*/ private static SimpleIRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); + /** synonyms are great but are not used yet by the HMM algorithm. **/ private static final boolean CREATE_SYNONYMS = false; + /** The minimum score of items that are accepted from the Sindice search BOA index. **/ private static final Double BOA_THRESHOLD = 0.9; private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); @@ -511,8 +515,8 @@ keywords.add(slot.getWords().get(0)); } if(template.getSlots().size()!=3) {continue;} - if(!keywords.contains("Mean Hamster Software")) {continue;} - if(!keywords.contains("published")) {continue;} +// if(!keywords.contains("Mean Hamster Software")) {continue;} +// if(!keywords.contains("published")) {continue;} System.out.println("\"keywords\": "+keywords); } System.out.println(template); @@ -542,7 +546,8 @@ for(String word: slot.getWords()) {max = Math.max(max, Similarity.getSimilarity(word, info.getLabel()));} if(max<0||max>1) throw new AssertionError("max is not in [0,1], max="+max); info.setStringSimilarityScore(max); - + if(!info.setTypeFromDBpediaURI()) throw new AssertionError("could not set type for info "+info); + System.err.println("info with type: "+info); resourceInfos.add(info); } segmentToURIs.put(segment,resourceInfos); Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-21 14:12:34 UTC (rev 3848) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-26 14:44:42 UTC (rev 3849) @@ -105,7 +105,7 @@ private static final boolean DBPEDIA_PRETAGGED = true; private static final boolean OXFORD_PRETAGGED = false; - /*@Test*/ public void testDBpedia() throws Exception + @Test public void testDBpedia() throws Exception { File file = generateTestDataIfNecessary( new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), @@ -162,7 +162,7 @@ /** For debugging one question in particular. */ - @Test public void testSingleQueryDBpedia() + /*@Test*/ public void testSingleQueryDBpedia() { // Logger.getLogger(Templator.class).setLevel(Level.DEBUG); // Logger.getLogger(Parser.class).setLevel(Level.DEBUG); @@ -681,7 +681,7 @@ return testData; } - /** + /** Updates question file by removing questions without nonempty resource list answer and adding answers. * @param file * @param updatedFile * @throws ParserConfigurationException This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-10-30 16:32:53
|
Revision: 3862 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3862&view=rev Author: kirdie Date: 2012-10-30 16:32:40 +0000 (Tue, 30 Oct 2012) Log Message: ----------- fixed oxford testing. Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/main/resources/tbsl/evaluation/oxford_working_questions.xml branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Added Paths: ----------- branches/hmm/components-ext/src/main/resources/tbsl/evaluation/oxford_working_questions_justquestions.xml Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-10-30 16:10:36 UTC (rev 3861) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-10-30 16:32:40 UTC (rev 3862) @@ -90,15 +90,13 @@ import com.jamonapi.MonitorFactory; /** The old learner taken over by Konrad Höffner for experiments with the Hidden Markov Algorithm by Saedeeh Shekarpur. - * * */ public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm { - private static final boolean USE_HMM = false; /** synonyms are great but are not used yet by the HMM algorithm. **/ - private static final boolean HMM_USE_SYNONYMS = false; + private static final boolean HMM_USE_SYNONYMS = true; /** The minimum score of items that are accepted from the Sindice search BOA index. **/ - private static final Double BOA_THRESHOLD = 0.9; + private static final Double BOA_THRESHOLD = 0.5; enum Mode {BEST_QUERY, BEST_NON_EMPTY_QUERY} private Mode mode = Mode.BEST_QUERY; @@ -394,35 +392,37 @@ // templateMon.reset(); // sparqlMon.reset(); } - - public void learnSPARQLQueries() throws NoTemplateFoundException{ + public void learnSPARQLQueries() throws NoTemplateFoundException + { + learnSPARQLQueries(false); + } + + public void learnSPARQLQueries(boolean useHMM) throws NoTemplateFoundException + { reset(); //generate SPARQL query templates logger.debug("Generating SPARQL query templates..."); templateMon.start(); if(multiThreaded){ - templates = templateGenerator.buildTemplatesMultiThreaded(question,!USE_HMM||HMM_USE_SYNONYMS); + templates = templateGenerator.buildTemplatesMultiThreaded(question,!useHMM||HMM_USE_SYNONYMS); } else { templates = templateGenerator.buildTemplates(question); } templateMon.stop(); logger.debug("Done in " + templateMon.getLastValue() + "ms."); relevantKeywords.addAll(templateGenerator.getUnknownWords()); - if(templates.isEmpty()){ - throw new NoTemplateFoundException(); + if(templates.isEmpty()){throw new NoTemplateFoundException();} +// logger.debug("Templates:"); +// for(Template t : templates){ +// logger.debug(t); +// } - } - logger.debug("Templates:"); - for(Template t : templates){ - logger.debug(t); - } - //get the weighted query candidates - generatedQueries = getWeightedSPARQLQueries(templates,USE_HMM); + generatedQueries = getWeightedSPARQLQueries(templates,useHMM); sparqlQueryCandidates = new ArrayList<WeightedQuery>(); int i = 0; for(WeightedQuery wQ : generatedQueries){ - logger.debug(wQ.explain()); + logger.trace(wQ.explain()); sparqlQueryCandidates.add(wQ); if(i == maxTestedQueries){ break; @@ -526,52 +526,61 @@ private SortedSet<WeightedQuery> getWeightedSPARQLQueriesWithHMM(Set<Template> templates) { - // for testing + List<String> vars = new LinkedList<String>(); + if(templates.isEmpty()) throw new AssertionError("no templates"); + SortedSet<WeightedQuery> queries = new TreeSet<WeightedQuery>(); for(Template template: templates) { { ArrayList<String> keywords = new ArrayList<String>(); for(Slot slot: template.getSlots()) { - keywords.add(slot.getWords().get(0)); + if(!slot.getWords().isEmpty()) + { + // we don't have synonyms for hmm at the moment, so there should be just one word + if(slot.getWords().size()!=1) throw new AssertionError("more than one word with hmm for slot: "+slot.getWords()); + keywords.add(slot.getWords().get(0)); + vars.add(slot.getAnchor()); + } } - if(template.getSlots().size()!=3) {continue;} // if(!keywords.contains("Mean Hamster Software")) {continue;} // if(!keywords.contains("published")) {continue;} - System.out.println("\"keywords\": "+keywords); + logger.debug("\"keywords\": "+keywords); } - System.out.println(template); - SortedSet<WeightedQuery> queries = new TreeSet<WeightedQuery>(); + System.out.println(template); Query query = template.getQuery(); double score = 0; Map<List<String>,List<ResourceInfo>> segmentToURIs = new HashMap<List<String>,List<ResourceInfo>>(); - Map<String,IndexResultItem> uriUniqueToResultItem = new HashMap<String,IndexResultItem>(); +// Map<String,IndexResultItem> uriUniqueToResultItem = new HashMap<String,IndexResultItem>(); for(Slot slot: template.getSlots()) { - List<String> segment = new LinkedList<String>(); - segment.addAll(Arrays.asList(slot.getWords().get(0).split("\\s"))); - List<ResourceInfo> resourceInfos = new LinkedList<ResourceInfo>(); + if(!slot.getWords().isEmpty()){ + List<String> segment = new LinkedList<String>(); + segment.addAll(Arrays.asList(slot.getWords().get(0).split("\\s"))); + List<ResourceInfo> resourceInfos = new LinkedList<ResourceInfo>(); - for(IndexResultItem item : getIndexResultItems(slot)) - { - // if this gets used at another place, create a function IndexResultItemToResourceInfo() - ResourceInfo info = new ResourceInfo(); - info.setUri(item.getUri()); - String label = item.getLabel(); - // in dbpedia, the last part of the uri is transformed from the english label, reverse the transformation (should almost always work for dbpedia article resources) - info.setLabel(label!=null?label:sfp.getShortForm(IRI.create(item.getUri()))); - // in saedeehs algorithm, the emission probabilty is formed by the string similarity - // but we use the lucene index score - double max = 0; - for(String word: slot.getWords()) {max = Math.max(max, Similarity.getSimilarity(word, info.getLabel()));} - if(max<0||max>1) throw new AssertionError("max is not in [0,1], max="+max); - info.setStringSimilarityScore(max); - if(!info.setTypeFromDBpediaURI()) throw new AssertionError("could not set type for info "+info); - System.err.println("info with type: "+info); - resourceInfos.add(info); + for(IndexResultItem item : getIndexResultItems(slot)) + { + // if this gets used at another place, create a function IndexResultItemToResourceInfo() + ResourceInfo info = new ResourceInfo(); + info.setUri(item.getUri()); + String label = item.getLabel(); + // in dbpedia, the last part of the uri is transformed from the english label, reverse the transformation (should almost always work for dbpedia article resources) + info.setLabel(label!=null?label:sfp.getShortForm(IRI.create(item.getUri()))); + // in saedeehs algorithm, the emission probabilty is formed by the string similarity + // but we use the lucene index score + double max = 0; + for(String word: slot.getWords()) {max = Math.max(max, Similarity.getSimilarity(word, info.getLabel()));} + if(max<0||max>1) throw new AssertionError("max is not in [0,1], max="+max); + info.setStringSimilarityScore(max); + if(!info.setTypeFromDBpediaURI()) throw new AssertionError("could not set type for info "+info); + System.err.println("info with type: "+info); + resourceInfos.add(info); + } + segmentToURIs.put(segment,resourceInfos); } - segmentToURIs.put(segment,resourceInfos); + } HiddenMarkovModel hmm = new HiddenMarkovModel(); hmm.initialization(); @@ -595,10 +604,10 @@ System.out.println(q.getVariablesAsStringList()); System.out.println(); int i = 0; - for(String var : q.getVariablesAsStringList()) - { - q.replaceVarWithURI(var, path.get(i)); - i++; + for(String uri : path){ + uri = uri.trim(); + String var = vars.get(path.indexOf(uri)); + q.replaceVarWithURI(var, uri); } System.out.println(q); @@ -607,8 +616,7 @@ queries.add(wQuery); } } - //System.exit(0); - return queries; + //System.exit(0); // >> SLOTS: // y0: RESOURCE {Mean Hamster Software} // p0: OBJECTPROPERTY {published,print} @@ -618,10 +626,10 @@ // System.out.println(template); } // - return null; + return queries; } - private SortedSet<WeightedQuery> getWeightedSPARQLQueriesWithoutHMM(Set<Template> templates){ + @SuppressWarnings("unused") private SortedSet<WeightedQuery> getWeightedSPARQLQueriesWithoutHMM(Set<Template> templates){ logger.debug("Generating SPARQL query candidates..."); Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { @@ -640,11 +648,10 @@ SortedSet<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); - Set<Allocation> allocations; - - for(Template t : templates){ - logger.info("Processing template:\n" + t.toString()); - allocations = new TreeSet<Allocation>(); + for(Template t : templates) + { + logger.info("Processing template:\n" + t.toString()); +// Set<Allocation> allocations = new TreeSet<Allocation>(); boolean containsRegex = t.getQuery().toString().toLowerCase().contains("(regex("); ExecutorService executor = Executors.newFixedThreadPool(t.getSlots().size()); @@ -653,7 +660,7 @@ long startTime = System.currentTimeMillis(); for (Slot slot : t.getSlots()) { - if(!slot2Allocations.containsKey(slot)){//System.out.println(slot + ": " + slot.hashCode());System.out.println(slot2Allocations); + if(!slot2Allocations.containsKey(slot)){ Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot); Future<Map<Slot, SortedSet<Allocation>>> submit = executor.submit(worker); list.add(submit); @@ -668,7 +675,8 @@ } catch (InterruptedException e) { e.printStackTrace(); } catch (ExecutionException e) { - e.printStackTrace(); +// e.printStackTrace(); + throw new RuntimeException(e); } } @@ -734,7 +742,7 @@ queries.clear(); queries.addAll(tmp); tmp.clear(); - } + } for(Slot slot : sortedSlots){ if(!slot2Allocations.get(slot).isEmpty()){ @@ -743,104 +751,104 @@ Query q = new Query(query.getQuery()); boolean drop = false; - if(useDomainRangeRestriction){ - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); - // System.out.println(triple); - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ - // System.out.println(typeTriple); - if(true){//reasoner.isObjectProperty(a.getUri())){ - Description range = reasoner.getRange(new ObjectProperty(a.getUri())); - // System.out.println(a); - if(range != null){ - Set<Description> allRanges = new HashSet<Description>(); - SortedSet<Description> superClasses; - if(range instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(range); - allRanges.addAll(superClasses); - } else { - for(Description nc : range.getChildren()){ - superClasses = reasoner.getSuperClasses(nc); - allRanges.addAll(superClasses); - } - } - allRanges.add(range); - allRanges.remove(new NamedClass(Thing.instance.getURI())); +// if(useDomainRangeRestriction){ +// if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ +// for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ +// String objectVar = triple.getValue().getName(); +// String subjectVar = triple.getVariable().getName(); +// +// for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ +// +// if(true){//reasoner.isObjectProperty(a.getUri())){ +// Description range = reasoner.getRange(new ObjectProperty(a.getUri())); +// +// if(range != null){ +// Set<Description> allRanges = new HashSet<Description>(); +// SortedSet<Description> superClasses; +// if(range instanceof NamedClass){ +// superClasses = reasoner.getSuperClasses(range); +// allRanges.addAll(superClasses); +// } else { +// for(Description nc : range.getChildren()){ +// superClasses = reasoner.getSuperClasses(nc); +// allRanges.addAll(superClasses); +// } +// } +// allRanges.add(range); +// allRanges.remove(new NamedClass(Thing.instance.getURI())); +// +// Set<Description> allTypes = new HashSet<Description>(); +// String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); +// Description type = new NamedClass(typeURI); +// superClasses = reasoner.getSuperClasses(type); +// allTypes.addAll(superClasses); +// allTypes.add(type); +// +// if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ +// drop = true; +// } +// } +// } else { +// drop = true; +// } +// +// } +// for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ +// Description domain = reasoner.getDomain(new ObjectProperty(a.getUri())); +// +// if(domain != null){ +// Set<Description> allDomains = new HashSet<Description>(); +// SortedSet<Description> superClasses; +// if(domain instanceof NamedClass){ +// superClasses = reasoner.getSuperClasses(domain); +// allDomains.addAll(superClasses); +// } else { +// for(Description nc : domain.getChildren()){ +// superClasses = reasoner.getSuperClasses(nc); +// allDomains.addAll(superClasses); +// } +// } +// allDomains.add(domain); +// allDomains.remove(new NamedClass(Thing.instance.getURI())); +// +// Set<Description> allTypes = new HashSet<Description>(); +// String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); +// Description type = new NamedClass(typeURI); +// superClasses = reasoner.getSuperClasses(type); +// allTypes.addAll(superClasses); +// allTypes.add(type); +// +// if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ +// drop = true; +// } else { +// +// } +// } +// } +// } +// } +// } - Set<Description> allTypes = new HashSet<Description>(); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Description type = new NamedClass(typeURI); - superClasses = reasoner.getSuperClasses(type); - allTypes.addAll(superClasses); - allTypes.add(type); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; - } - } - } else { - drop = true; - } - - } - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ - Description domain = reasoner.getDomain(new ObjectProperty(a.getUri())); - // System.out.println(a); - if(domain != null){ - Set<Description> allDomains = new HashSet<Description>(); - SortedSet<Description> superClasses; - if(domain instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(domain); - allDomains.addAll(superClasses); - } else { - for(Description nc : domain.getChildren()){ - superClasses = reasoner.getSuperClasses(nc); - allDomains.addAll(superClasses); - } - } - allDomains.add(domain); - allDomains.remove(new NamedClass(Thing.instance.getURI())); - - Set<Description> allTypes = new HashSet<Description>(); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Description type = new NamedClass(typeURI); - superClasses = reasoner.getSuperClasses(type); - allTypes.addAll(superClasses); - allTypes.add(type); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; - } else { - - } - } - } - } - } - } - if(!drop){ if(slot.getSlotType() == SlotType.RESOURCE){//avoid queries where predicate is data property and object resource->add REGEX filter in this case for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ SPARQL_Value object = triple.getValue(); - if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position - SPARQL_Property predicate = triple.getProperty(); - if(!predicate.isVariable()){//only consider triple where predicate is URI - String predicateURI = predicate.getName().replace("<", "").replace(">", ""); - if(isDatatypeProperty(predicateURI)){//if data property - q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } +// if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position +// SPARQL_Property predicate = triple.getProperty(); +// if(!predicate.isVariable()){//only consider triple where predicate is URI +// String predicateURI = predicate.getName().replace("<", "").replace(">", ""); +// if(isDatatypeProperty(predicateURI)){//if data property +// q.addFilter(new SPARQL_Filter(new SPARQL_Pair( +// object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); +// } else { +// q.replaceVarWithURI(slot.getAnchor(), a.getUri()); +// } +// } else { +// q.replaceVarWithURI(slot.getAnchor(), a.getUri()); +// } +// } else { +// +// } } } else { q.replaceVarWithURI(slot.getAnchor(), a.getUri()); @@ -864,7 +872,7 @@ } queries.clear(); - queries.addAll(tmp);//System.out.println(tmp); + queries.addAll(tmp); tmp.clear(); } else {//Add REGEX FILTER if resource slot is empty and predicate is datatype property if(slot.getSlotType() == SlotType.RESOURCE){ @@ -952,6 +960,7 @@ } } + for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) { WeightedQuery wQ = iterator.next(); if(dropZeroScoredQueries){ @@ -966,11 +975,12 @@ } allQueries.addAll(queries); List<Query> qList = new ArrayList<Query>(); - for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery()); + for(WeightedQuery wQ : queries){ qList.add(wQ.getQuery()); } template2Queries.put(t, qList); } + logger.debug(allQueries); logger.debug("...done in "); return allQueries; } @@ -1011,14 +1021,13 @@ if(popularity == null){ popularity = Integer.valueOf(0); } - System.out.println(popularity); + logger.trace("popularity: "+popularity); - // if(cnt == 0){ // return 0; // } // return Math.log(cnt); - if(popularity!=popularity) {throw new AssertionError("prominence NaN for uri "+uri+", slot type "+type);} + if(Double.isNaN(popularity)) {throw new AssertionError("prominence NaN for uri "+uri+", slot type "+type);} return popularity; } Modified: branches/hmm/components-ext/src/main/resources/tbsl/evaluation/oxford_working_questions.xml =================================================================== (Binary files differ) Added: branches/hmm/components-ext/src/main/resources/tbsl/evaluation/oxford_working_questions_justquestions.xml =================================================================== (Binary files differ) Property changes on: branches/hmm/components-ext/src/main/resources/tbsl/evaluation/oxford_working_questions_justquestions.xml ___________________________________________________________________ Added: svn:mime-type + application/xml Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-10-30 16:10:36 UTC (rev 3861) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-10-30 16:32:40 UTC (rev 3862) @@ -12,6 +12,7 @@ import java.io.PrintWriter; import java.io.Serializable; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.Set; import java.util.SortedMap; @@ -36,7 +37,8 @@ public class QueryTestData implements Serializable { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 2L; + public boolean hmm = false; public SortedMap<Integer, String> id2Question = new ConcurrentSkipListMap<Integer, String>(); public SortedMap<Integer, String> id2Query = new ConcurrentSkipListMap<Integer, String>(); public SortedMap<Integer, Set<String>> id2Answers = new ConcurrentSkipListMap<Integer, Set<String>>(); @@ -73,138 +75,153 @@ if(!id2Answers.isEmpty()) {throw new AssertionError("Answers already existing.");} for(int i:id2Query.keySet()) { - Set<String> uris = SPARQLTemplateBasedLearner3Test.getUris(endpoint, id2Query.get(i),cache,model); - id2Answers.put(i, uris); // empty answer set better transfers intended meaning and doesn't cause NPEs in html generation :-) - if(!uris.isEmpty()) {/*id2Answers.put(i, uris);*/} - else {id2LearnStatus.put(i, LearnStatus.QUERY_RESULT_EMPTY);} - } - return this; + try + { + Set<String> uris = SPARQLTemplateBasedLearner3Test.getUris(endpoint, id2Query.get(i),cache,model); + // empty answer set better transfers intended meaning and doesn't cause NPEs in html generation :-) + id2Answers.put(i, uris); + if(!uris.isEmpty()) {/*id2Answers.put(i, uris);*/} + else {id2LearnStatus.put(i, LearnStatus.QUERY_RESULT_EMPTY);} + + } + catch(Exception e) + { + id2Answers.put(i, Collections.<String>emptySet()); + id2LearnStatus.put(i, LearnStatus.exceptionStatus(e)); + } } + return this; +} - /** reads test data from a QALD2 benchmark XML file, including questions, queries and answers. - * each question needs to have a query but not necessarily an answer. - * @param file a QALD benchmark XML file - * @param MAX_NUMBER_OF_QUESTIONS the maximum number of questions read from the file. - * @return the test data read from the XML file */ - public static QueryTestData readQaldXml(final File file, int MAX_NUMBER_OF_QUESTIONS) - { - QueryTestData testData = new QueryTestData(); - try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - DocumentBuilder db = dbf.newDocumentBuilder(); - Document doc = db.parse(file); - doc.getDocumentElement().normalize(); - NodeList questionNodes = doc.getElementsByTagName("question"); - int id; +/** reads test data from a QALD2 benchmark XML file, including questions, queries and answers. + * each question needs to have a query but not necessarily an answer. + * @param file a QALD benchmark XML file + * @param MAX_NUMBER_OF_QUESTIONS the maximum number of questions read from the file. + * @return the test data read from the XML file */ +public static QueryTestData readQaldXml(final File file, final int MAX_NUMBER_OF_QUESTIONS, boolean whitelistOnly,Set<Integer> whitelist) +{ + QueryTestData testData = new QueryTestData(); + try { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilder db = dbf.newDocumentBuilder(); + Document doc = db.parse(file); + doc.getDocumentElement().normalize(); + NodeList questionNodes = doc.getElementsByTagName("question"); + int id; - for(int i = 0; i < questionNodes.getLength(); i++) + for(int i = 0; i < questionNodes.getLength(); i++) + { + if(i>MAX_NUMBER_OF_QUESTIONS) break; + String question; + String query; + Set<String> answers = new HashSet<String>(); + Element questionNode = (Element) questionNodes.item(i); + //read question ID + id = Integer.valueOf(questionNode.getAttribute("id")); + if(whitelistOnly&&!whitelist.contains(id)) {continue;} + + //Read question + question = ((Element)questionNode.getElementsByTagName("string").item(0)).getChildNodes().item(0).getNodeValue().trim(); + //Read SPARQL query + query = ((Element)questionNode.getElementsByTagName("query").item(0)).getChildNodes().item(0).getNodeValue().trim(); + // //Read answers + // answers = new HashSet<String>(); + // NodeList aswersNodes = questionNode.getElementsByTagName("answer"); + // for(int j = 0; j < aswersNodes.getLength(); j++){ + // Element answerNode = (Element) aswersNodes.item(j); + // answers.add(((Element)answerNode.getElementsByTagName("uri").item(0)).getChildNodes().item(0).getNodeValue().trim()); + // } + + if(!query.equals("OUT OF SCOPE")) // marker in qald benchmark file, will create holes interval of ids (e.g. 1,2,5,7) { - if(i>MAX_NUMBER_OF_QUESTIONS) break; - String question; - String query; - Set<String> answers = new HashSet<String>(); - Element questionNode = (Element) questionNodes.item(i); - //read question ID - id = Integer.valueOf(questionNode.getAttribute("id")); - //Read question - question = ((Element)questionNode.getElementsByTagName("string").item(0)).getChildNodes().item(0).getNodeValue().trim(); - //Read SPARQL query - query = ((Element)questionNode.getElementsByTagName("query").item(0)).getChildNodes().item(0).getNodeValue().trim(); - // //Read answers - // answers = new HashSet<String>(); - // NodeList aswersNodes = questionNode.getElementsByTagName("answer"); - // for(int j = 0; j < aswersNodes.getLength(); j++){ - // Element answerNode = (Element) aswersNodes.item(j); - // answers.add(((Element)answerNode.getElementsByTagName("uri").item(0)).getChildNodes().item(0).getNodeValue().trim()); - // } - - if(!query.equals("OUT OF SCOPE")) // marker in qald benchmark file, will create holes interval of ids (e.g. 1,2,5,7) + testData.id2Question.put(id, question); + testData.id2Query.put(id, query); + Element answersElement = (Element) questionNode.getElementsByTagName("answers").item(0); + // some of our qald files were mistakenly created so that they have the "answer" elements directly under the question node + // with no answers element + if(answersElement==null) answersElement = (Element)questionNode; +// if(answersElement!=null) { - testData.id2Question.put(id, question); - testData.id2Query.put(id, query); - Element answersElement = (Element) questionNode.getElementsByTagName("answers").item(0); - if(answersElement!=null) + NodeList answerElements = answersElement.getElementsByTagName("answer"); + for(int j=0; j<answerElements.getLength();j++) { - NodeList answerElements = answersElement.getElementsByTagName("answer"); - for(int j=0; j<answerElements.getLength();j++) - { - String answer = ((Element)answerElements.item(j)).getTextContent(); - answers.add(answer); - } - testData.id2Answers.put(id, answers); + String answer = ((Element)answerElements.item(j)).getTextContent(); + answers.add(answer); } - } - // question2Answers.put(question, answers); + testData.id2Answers.put(id, answers); + } + } + // question2Answers.put(question, answers); - } - } catch (DOMException e) { - e.printStackTrace(); - } catch (ParserConfigurationException e) { - e.printStackTrace(); - } catch (SAXException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - return testData; - } + } + } catch (DOMException e) { + e.printStackTrace(); + } catch (ParserConfigurationException e) { + e.printStackTrace(); + } catch (SAXException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + return testData; +} - /** write the test data to a QALD2 benchmark XML file, including questions, queries and answers. - * each question needs to have a query but not necessarily an answer. - * @param file a QALD benchmark XML file **/ - public void writeQaldXml(final File file) - { - // see http://www.genedavis.com/library/xml/java_dom_xml_creation.jsp - try +/** write the test data to a QALD2 benchmark XML file, including questions, queries and answers. + * each question needs to have a query but not necessarily an answer. + * @param file a QALD benchmark XML file **/ +public void writeQaldXml(final File file) +{ + // see http://www.genedavis.com/library/xml/java_dom_xml_creation.jsp + try + { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilder db = dbf.newDocumentBuilder(); + Document doc = db.newDocument(); + Element root = doc.createElement("dataset"); + doc.appendChild(root); + + for(Integer i:id2Question.keySet()) { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - DocumentBuilder db = dbf.newDocumentBuilder(); - Document doc = db.newDocument(); - Element root = doc.createElement("dataset"); - doc.appendChild(root); - - for(Integer i:id2Question.keySet()) + Element questionElement = doc.createElement("question"); + questionElement.setAttribute("id", i.toString()); + questionElement.setAttribute("answertype", "resource"); + root.appendChild(questionElement); + Element stringElement = doc.createElement("string"); + stringElement.setTextContent(id2Question.get(i)); + questionElement.appendChild(stringElement); + String query = id2Query.get(i); + if(query!=null) + { + Element queryElement = doc.createElement("query"); + // queryElement.setTextContent(query); + queryElement.appendChild(doc.createCDATASection(query)); + questionElement.appendChild(queryElement); + } + Collection<String> answers = id2Answers.get(i); + if(answers!=null) { - Element questionElement = doc.createElement("question"); - questionElement.setAttribute("id", i.toString()); - questionElement.setAttribute("answertype", "resource"); - root.appendChild(questionElement); - Element stringElement = doc.createElement("string"); - stringElement.setTextContent(id2Question.get(i)); - questionElement.appendChild(stringElement); - String query = id2Query.get(i); - if(query!=null) - { - Element queryElement = doc.createElement("query"); -// queryElement.setTextContent(query); - queryElement.appendChild(doc.createCDATASection(query)); - questionElement.appendChild(queryElement); - } - Collection<String> answers = id2Answers.get(i); - if(answers!=null) + for(String answer: answers) { - for(String answer: answers) - { - Element answerElement = doc.createElement("answer"); - answerElement.setTextContent(answer); - questionElement.appendChild(answerElement); - } + Element answerElement = doc.createElement("answer"); + answerElement.setTextContent(answer); + questionElement.appendChild(answerElement); } - } - //set up a transformer - TransformerFactory transfac = TransformerFactory.newInstance(); - Transformer trans = transfac.newTransformer(); - trans.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); - trans.setOutputProperty(OutputKeys.INDENT, "yes"); + } + } + //set up a transformer + TransformerFactory transfac = TransformerFactory.newInstance(); + Transformer trans = transfac.newTransformer(); + trans.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); + trans.setOutputProperty(OutputKeys.INDENT, "yes"); - - //create string from xml tree - PrintWriter sw = new PrintWriter(file); - StreamResult result = new StreamResult(sw); - DOMSource source = new DOMSource(doc); - trans.transform(source, result); - } - catch (Exception e) {throw new RuntimeException(e);} + + //create string from xml tree + PrintWriter sw = new PrintWriter(file); + StreamResult result = new StreamResult(sw); + DOMSource source = new DOMSource(doc); + trans.transform(source, result); } + catch (Exception e) {throw new RuntimeException(e);} +} } \ No newline at end of file Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-10-30 16:10:36 UTC (rev 3861) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-10-30 16:32:40 UTC (rev 3862) @@ -4,6 +4,7 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; @@ -12,9 +13,12 @@ import java.io.PrintWriter; import java.io.Serializable; import java.io.UnsupportedEncodingException; +import java.net.HttpURLConnection; import java.net.MalformedURLException; +import java.net.URL; import java.net.URLDecoder; import java.text.DateFormat; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Date; @@ -36,6 +40,7 @@ import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.regex.Pattern; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; @@ -44,6 +49,9 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import org.apache.commons.collections15.BidiMap; +import org.apache.commons.collections15.bidimap.DualHashBidiMap; +import org.apache.commons.lang.StringEscapeUtils; import org.apache.log4j.FileAppender; import org.apache.log4j.Level; import org.apache.log4j.Logger; @@ -69,7 +77,6 @@ import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; -import cern.colt.Arrays; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; @@ -99,25 +106,150 @@ // problem mit "In/IN which/WDT films/NNS did/VBD Julia/NNP Roberts/NNP as/RB well/RB as/IN Richard/NNP Gere/NNP play/NN" public class SPARQLTemplateBasedLearner3Test -{ +{ + private static final boolean USE_HMM = false; private static final File evaluationFolder = new File("cache/evaluation"); private static final boolean DBPEDIA_PRETAGGED = true; private static final boolean OXFORD_PRETAGGED = false; - private static final int MAX_NUMBER_OF_QUESTIONS = 10; + private static final int MAX_NUMBER_OF_QUESTIONS = 20; + private static final boolean WHITELIST_ONLY = false; + private static final Set<Integer> WHITELIST = Collections.unmodifiableSet(new HashSet<Integer>(Arrays.asList(new Integer[] {4}))); - @Test public void testDBpedia() throws Exception + /*@Test*/ public void testDBpedia() throws Exception { File file = generateTestDataIfNecessary( new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), SparqlEndpoint.getEndpointDBpedia(), dbpediaLiveCache); - test("QALD 2 Benchmark ideally tagged", file,SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache,dbpediaLiveKnowledgebase,null,null); + test("QALD 2 Benchmark ideally tagged", file,SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache,dbpediaLiveKnowledgebase,null,null,DBPEDIA_PRETAGGED); } - /*@Test*/ public void testOxford() throws Exception + // private char[] hmmHtmlRow(String question, String string, String string2, String string3, Set<String> set, Set<String> set2, + // Set<String> set3, LearnStatus learnStatus, LearnStatus learnStatus2) + // { + // return null; + // } + + private static boolean probablySparqlSelectQuery(String s) { + s=s.toLowerCase(); + return s.contains("select")&&s.contains("{")&&s.contains("}"); + } + + /** returns an html table row representation <tr><td>t(o_1)</td>...<td>t(o_n)</td></tr> of the string representation of objects, + * transformed by escaping HTML characters, setting fixed width on SPARQL queries and shortening and linking of dbpedia resource URIs. + */ + // unescaped form from the top: <tr><td>t(o_1)</td>...<td>t(o_n)</td></tr> + private static String htmlTableTr(Object... tds) + { + System.out.println(); + StringBuilder sb = new StringBuilder(); + // shorten and link dbpedia resource uris + Pattern p = Pattern.compile("http://dbpedia\\.org/resource/([\\w]*)"); + + for(Object td: tds) + { + if(td==null) {sb.append("<td></td>");continue;} + sb.append("<td>"); + + // probably a SPARQL query? use fixed font width. + String s = StringEscapeUtils.escapeHtml(td.toString()); + if(probablySparqlSelectQuery(s)) {s="<pre>"+s+"</pre>";} + else {s =(p.matcher(s).replaceAll("<a href=\"$0\">dbpedia:$1</a>"));} + + sb.append(s); + sb.append("</td>"); + } + return sb.toString(); + } + + @SuppressWarnings("unchecked") /*@Test*/ public void evaluateHMMAgainstNormalAndBenchmark() throws FileNotFoundException + { + // get the newest evaluations from both with and without hmm + SortedMap<Long,Evaluation> evaluations = new TreeMap<Long,Evaluation>(Collections.reverseOrder()); + evaluations.putAll(Evaluation.read()); + Evaluation newestWithHmm = null; + Evaluation newestWithoutHmm = null; + + for(Iterator<Long> it = evaluations.keySet().iterator();it.hasNext()&&(newestWithHmm==null||newestWithoutHmm==null);) + { + Evaluation e = evaluations.get(it.next()); + if(e.testData.hmm) + {if(newestWithHmm==null) {newestWithHmm=e;}} + else if(newestWithoutHmm==null) {newestWithoutHmm=e;} + } + if(newestWithHmm==null||newestWithoutHmm==null) {logger.warn("No pair of evaluations for Aborting.");return;} + + Set<String> intersectionOfQuestions = new HashSet<String>(newestWithHmm.testData.id2Question.values()); + intersectionOfQuestions.retainAll(newestWithoutHmm.testData.id2Question.values()); + if(intersectionOfQuestions.isEmpty()) {logger.warn("No common questions. Aborting.");return;} + + Set<String> questionsOnlyCorrectWithHMM = new HashSet<String> (intersectionOfQuestions); + questionsOnlyCorrectWithHMM.retainAll(newestWithHmm.correctlyAnsweredQuestions); + questionsOnlyCorrectWithHMM.removeAll(newestWithoutHmm.correctlyAnsweredQuestions); + + Set<String> questionsOnlyCorrectWithoutHMM = new HashSet<String> (intersectionOfQuestions); + questionsOnlyCorrectWithoutHMM.retainAll(newestWithoutHmm.correctlyAnsweredQuestions); + questionsOnlyCorrectWithoutHMM.removeAll(newestWithHmm.correctlyAnsweredQuestions); + + PrintWriter out = new PrintWriter("log/evaluatehmm.html"); + String title = "Evaluation of HMM vs the normal disambiguation."; + + out.println("<!DOCTYPE html><html>\n<head><title>"+title+"</title></head>\n<body>\n<table border='1'>"); + out.println("<tr><th>Question</th><th>Query with HMM</th><th>Query without HMM</th><th>Reference Query</th>" + + "<th>Answers with HMM</th><th>Answers without HMM</th><th>Reference Answers</th><th>Status with HMM</th><th>Status without HMM</th></tr>"); + + // most of the time it should be enough to assume that the keys are equal, but this could introduce subtle bugs + BidiMap<String,Integer> question2IdWithHmm = new DualHashBidiMap<Integer,String>(newestWithHmm.testData.id2Question).inverseBidiMap(); + BidiMap<String,Integer> question2IdWithoutHmm = new DualHashBidiMap<Integer,String>(newestWithoutHmm.testData.id2Question).inverseBidiMap(); + // if(newestWithHmm.correctlyAnsweredQuestions.contains(question)!=newestWithoutHmm.correctlyAnsweredQuestions.contains(question)) {..} + + for(Set<String> c : new Set[] {questionsOnlyCorrectWithHMM, questionsOnlyCorrectWithoutHMM}) + { + for(String question: c) + { + int idWithHmm = question2IdWithHmm.get(question); + int idWithoutHmm = question2IdWithoutHmm.get(question); + out.println(htmlTableTr( + question, + newestWithHmm.testData.id2Query.get(idWithHmm), + newestWithoutHmm.testData.id2Query.get(idWithoutHmm), + newestWithHmm.referenceData.id2Query.get(idWithHmm), + newestWithHmm.testData.id2Answers.get(idWithHmm), + newestWithoutHmm.testData.id2Answers.get(idWithoutHmm), + newestWithHmm.referenceData.id2Answers.get(idWithHmm), + newestWithHmm.testData.id2LearnStatus.get(idWithHmm), + newestWithoutHmm.testData.id2LearnStatus.get(idWithoutHmm) + )); + } + } + + // Integer id = question2Id.get(question); + // if(evaluation.testData.id2Answers.get(id)==null) {System.err.println(question);continue;} + // out.println( + // "<tr><td>"+question+"</td>"+ + // "<td><code><pre>"+escapePre(evaluation.testData.id2Query.get(id))+"</pre></code></td>"+ + // "<td><code><pre>"+escapePre(evaluation.referenceData.id2Query.get(id))+"</pre></code></td>"+ + // "<td><ul>"+getAnswerHTMLList(evaluation.testData.id2Answers.get(id).toArray(new String[0]))+"</ul></td>"+ + // "<td><ul>"+getAnswerHTMLList(evaluation.referenceData.id2Answers.get(id).toArray(new String[0]))+"</ul></td>"+ + // "<td>"+evaluation.testData.id2LearnStatus.get(id)+"</td></tr>"); + + + + + + logger.info(questionsOnlyCorrectWithHMM.size()+" questions only correct with hmm, "+ + questionsOnlyCorrectWithoutHMM.size()+" questions only correct without hmm"); + + // generate a html description of it + + out.close(); + } + + @Test public void testOxford() throws Exception + { File file = new File(getClass().getClassLoader().getResource("tbsl/evaluation/oxford_working_questions.xml").getFile()); - test("Oxford 19 working questions", file,null,null,null,loadOxfordModel(),getOxfordMappingIndex()); + test("Oxford 19 working questions", file,null,null,null,loadOxfordModel(),getOxfordMappingIndex(),OXFORD_PRETAGGED); } // /*@Test*/ public void testOxford() throws Exception @@ -180,7 +312,7 @@ logger.info("learned query: "+testData.id2Query.get(0)); } - /*@Test*/ public void generateXMLOxford() throws IOException + /*@Test*/ @SuppressWarnings("null") public void generateXMLOxford() throws IOException { boolean ADD_POS_TAGS = true; PartOfSpeechTagger posTagger = null; @@ -289,10 +421,10 @@ } } - public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache,Knowledgebase kb, Model model, MappingBasedIndex index) + public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache,Knowledgebase kb, Model model, MappingBasedIndex index,boolean pretagged) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { - evaluateAndWrite(title,referenceXML,endpoint,cache,kb,model,index); + evaluateAndWrite(title,referenceXML,endpoint,cache,kb,model,index,pretagged); generateHTML(title); // if(evaluation.numberOfCorrectAnswers<3) {fail("only " + evaluation.numberOfCorrectAnswers+" correct answers.");} @@ -335,14 +467,14 @@ } private void evaluateAndWrite(String title,final File updatedReferenceXML, final SparqlEndpoint endpoint,ExtractionDBCache cache, - Knowledgebase kb, Model model, MappingBasedIndex index) + Knowledgebase kb, Model model, MappingBasedIndex index,boolean pretagged) { - QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML,MAX_NUMBER_OF_QUESTIONS); + QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML,MAX_NUMBER_OF_QUESTIONS,WHITELIST_ONLY,WHITELIST); logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); long startLearning = System.currentTimeMillis(); - QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, kb,model,index,DBPEDIA_PRETAGGED); + QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, kb,model,index,pretagged); long endLearning = System.currentTimeMillis(); logger.info("finished learning after "+(endLearning-startLearning)/1000.0+"s"); learnedTestData.generateAnswers(endpoint,cache,model); @@ -376,6 +508,17 @@ String referenceQuery = reference.id2Query.get(i); String suspectQuery = suspect.id2Query.get(i); // reference is required to contain answers for every key so we shouldn't get NPEs here (even though it could be the empty set but that shouldn't happen because only questions with nonempty answers are included in the updated reference) + if(reference.id2Answers.get(i)==null) + { + logger.warn("no reference answers for question "+i+" ("+question+")"); + continue; + } + if(suspect.id2Answers.get(i)==null) + { + logger.warn("no suspect answers for question "+i+" ("+question+")"); + continue; + } + if(referenceQuery.equals(suspectQuery)||reference.id2Answers.get(i).equals(suspect.id2Answers.get(i))) { evaluation.correctlyAnsweredQuestions.add(question); @@ -456,9 +599,12 @@ } catch(IOException e) {throw new RuntimeException(e);} } + /** + * @return the evaluations by timestamp, sorted ascending (from oldest to newest) + */ public static SortedMap<Long,Evaluation> read() { - SortedMap<Long,Evaluation> evaluations = new ConcurrentSkipListMap<Long,Evaluation>(); + SortedMap<Long,Evaluation> evaluations = new ConcurrentSkipListMap<Long,Evaluation>(); evaluationFolder.mkdirs(); File[] files = evaluationFolder.listFiles(); for(int i=0;i<files.length;i++) {evaluations.put(Long.valueOf(files[i].getName()),read(files[i]));} @@ -603,6 +749,7 @@ private QueryTestData generateTestDataMultiThreaded(SortedMap<Integer, String> id2Question,Knowledgebase kb,Model model, MappingBasedIndex index,boolean pretagged) { QueryTestData testData = new QueryTestData(); + testData.hmm = USE_HMM; // -- only create the learner parameters once to save time -- // PartOfSpeechTagger posTagger = new StanfordPartOfSpeechTagger(); // WordNet wordnet = new WordNet(); @@ -792,15 +939,32 @@ // private ResultSet executeDBpediaLiveSelect(String query){return SparqlQuery.convertJSONtoResultSet(dbpediaLiveCache.executeSelectQuery(dbpediaLiveEndpoint, query));} + private static boolean httpResponseOK(String url) throws MalformedURLException, IOException + { + HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection(); + connection.setRequestMethod("HEAD"); + int responseCode = connection.getResponseCode(); + return responseCode == 200; + } private static Knowledgebase createDBpediaLiveKnowledgebase(ExtractionDBCache cache) { - SOLRIndex resourcesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_resources"); + String resourcesURL = "http://dbpedia.aksw.org:8080/solr/dbpedia_resources123"; + String classesURL = "http://dbpedia.aksw.org:8080/solr/dbpedia_classes"; + String propertiesURL = "http://dbpedia.aksw.org:8080/solr/dbpedia_properties"; + String boaPropertiesURL = "http://139.18.2.173:8080/solr/boa_fact_detail"; + +// for(String url : new String[] {resourcesURL,classesURL,propertiesURL,boaPropertiesURL}) +// { +// try{if(!httpResponseOK(url)) throw new RuntimeException("Http response not 200 for url "+url);} catch(Exception e) {throw new RuntimeException(e);} +// } + + SOLRIndex resourcesIndex = new SOLRIndex(resourcesURL); resourcesIndex.setPrimarySearchField("label"); // resourcesIndex.setSortField("pagerank"); - Index classesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_classes"); - Index propertiesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_properties"); - SOLRIndex boa_propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/boa_fact_detail"); + Index classesIndex = new SOLRIndex(classesURL); + Index propertiesIndex = new SOLRIndex(propertiesURL); + SOLRIndex boa_propertiesIndex = new SOLRIndex(boaPropertiesURL); boa_propertiesIndex.setSortField("boa-score"); // propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex); MappingBasedIndex mappingIndex= new MappingBasedIndex( @@ -820,11 +984,13 @@ Logger.getRootLogger().setLevel(Level.WARN); Logger.getLogger(Templator.class).setLevel(Level.WARN); Logger.getLogger(Parser.class).setLevel(Level.WARN); - Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.WARN); + Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.DEBUG); // Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); logger.setLevel(Level.INFO); // TODO: remove when finishing implementation of this class logger.addAppender(new FileAppender(new SimpleLayout(), "log/"+this.getClass().getSimpleName()+".log", false)); + // Logger.getRootLogger().removeAllAppenders(); + // oxfordEndpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); // oxfordLearner = new SPARQLTemplateBasedLearner2(createOxfordKnowledgebase(oxfordCache)); } @@ -846,8 +1012,9 @@ } catch(QueryExceptionHTTP e) { - logger.error("Error getting uris for query "+query+" at endpoint "+endpoint,e); - return Collections.<String>emptySet(); + throw new QueryExceptionHTTP("Error getting uris for query "+query+" at endpoint "+endpoint,e); + // logger.error("Error getting uris for query "+query+" at endpoint "+endpoint,e); + // return Collections.<String>emptySet(); } String variable = "?uri"; resultsetloop: @@ -976,14 +1143,13 @@ @Override public LearnStatus call() { - logger.trace("learning question: "+question); try { // learn query learner.setQuestion(question); - learner.learnSPARQLQueries(); + learner.learnSPARQLQueries(USE_HMM); String learnedQuery = learner.getBestSPARQLQuery(); testData.id2Question.put(id, question); if(learnedQuery!=null&&!learnedQuery.isEmpty()) @@ -995,6 +1161,11 @@ // Set<String> learnedURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,learnedQuery); } + catch(AssertionError e ) + { + // this is the only exception that we want to halt on + throw new RuntimeException(e); + } catch(NoTemplateFoundException e) { logger.warn(String.format("no template found for question \"%s\"",question)); @@ -1070,7 +1241,7 @@ out.println("<!DOCTYPE html><html>\n<head><title>"+title+"</title></head>\n<body>\n<table border='1'>"); if(queriesAvailable) { - out.println("<tr><th>Question</th><th>Learned Query</th><th>Reference Query</th><th>Learned Answers</th><th>Reference Answers</th></tr>"); + out.println("<tr><th>Question</th><th>Learned Query</th><th>Reference Query</th><th>Learned Answers</th><th>Reference Answers</th><th>Error Type</th></tr>"); for(String question: questions) { Integer id = question2Id.get(question); @@ -1080,8 +1251,9 @@ "<td><code><pre>"+escapePre(evaluation.testData.id2Query.get(id))+"</pre></code></td>"+ "<td><code><pre>"+escapePre(evaluation.referenceData.id2Query.get(id))+"</pre></code></td>"+ "<td><ul>"+getAnswerHTMLList(evaluation.testData.id2Answers.get(id).toArray(new String[0]))+"</ul></td>"+ - "<td><ul>"+getAnswerHTMLList(evaluation.referenceData.id2Answers.get(id).toArray(new String[0]))+"</ul></td></tr>"); - } + "<td><ul>"+getAnswerHTMLList(evaluation.referenceData.id2Answers.get(id).toArray(new String[0]))+"</ul></td>"+ + "<td>"+evaluation.testData.id2LearnStatus.get(id)+"</td></tr>"); + } } else { out.println("<tr><th>Question</th><th>Error Type</th></tr>"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-11-02 17:47:45
|
Revision: 3867 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3867&view=rev Author: kirdie Date: 2012-11-02 17:47:38 +0000 (Fri, 02 Nov 2012) Log Message: ----------- began bugfixing learner 2. Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-11-02 14:52:52 UTC (rev 3866) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-11-02 17:47:38 UTC (rev 3867) @@ -713,6 +713,7 @@ Set<WeightedQuery> tmp = new TreeSet<WeightedQuery>(); List<Slot> sortedSlots = new ArrayList<Slot>(); Set<Slot> classSlots = new HashSet<Slot>(); + // TODO: can this be optimized to be in just one loop? (but I guess it won't give a noticable performance benefit anyways...) for(Slot slot : t.getSlots()){ if(slot.getSlotType() == SlotType.CLASS){ sortedSlots.add(slot); Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-11-02 14:52:52 UTC (rev 3866) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-11-02 17:47:38 UTC (rev 3867) @@ -117,11 +117,11 @@ private static final File evaluationFolder = new File("cache/evaluation"); private static final boolean DBPEDIA_PRETAGGED = true; private static final boolean OXFORD_PRETAGGED = false; - private static final int MAX_NUMBER_OF_QUESTIONS = Integer.MAX_VALUE; - private static final boolean WHITELIST_ONLY = false; + private static final int MAX_NUMBER_OF_QUESTIONS = 20; + private static final boolean WHITELIST_ONLY = true; private static final Set<Integer> WHITELIST = Collections.unmodifiableSet(new HashSet<Integer>(Arrays.asList(new Integer[] {4}))); - /*@Test*/ public void testDBpedia() throws Exception + @Test public void testDBpedia() throws Exception { File file = generateTestDataIfNecessary( new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), @@ -767,7 +767,7 @@ Map<Integer,Future<LearnStatus>> futures = new HashMap<Integer,Future<LearnStatus>>(); // List<FutureTask> todo = new ArrayList<FutureTask>(id2Question.size()); - ExecutorService service = Executors.newFixedThreadPool(1); + ExecutorService service = Executors.newCachedThreadPool();// newFixedThreadPool(1); for(int i: id2Question.keySet()) {//if(i != 78)continue; @@ -1001,7 +1001,7 @@ } } - @Test public void testSolrGoodResults() + /*@Test*/ public void testSolrGoodResults() { Knowledgebase dbpedia = createDBpediaLiveKnowledgebase(dbpediaLiveCache); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-11-07 18:13:53
|
Revision: 3868 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3868&view=rev Author: kirdie Date: 2012-11-07 18:13:41 +0000 (Wed, 07 Nov 2012) Log Message: ----------- fixed a bug with a comparator used for a treeset with slots and integrated this comparator into the Slot class. Also started a new version of the getWeightedSPARQLQueries(). Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-11-02 17:47:38 UTC (rev 3867) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-11-07 18:13:41 UTC (rev 3868) @@ -15,6 +15,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; @@ -98,7 +99,7 @@ /** The minimum score of items that are accepted from the Sindice search BOA index. **/ private static final Double BOA_THRESHOLD = 0.5; enum Mode {BEST_QUERY, BEST_NON_EMPTY_QUERY} - private Mode mode = Mode.BEST_QUERY; + private Mode mode = Mode.BEST_NON_EMPTY_QUERY; /** used to create a label out of the URI when there is no label available in the SPARQL endpoint.*/ private static SimpleIRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); @@ -521,6 +522,7 @@ private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates, boolean hmm) { +// return getWeightedSPARQLQueriesNew(templates); return hmm?getWeightedSPARQLQueriesWithHMM(templates):getWeightedSPARQLQueriesWithoutHMM(templates); } @@ -628,14 +630,42 @@ // return queries; } - - @SuppressWarnings("unused") private SortedSet<WeightedQuery> getWeightedSPARQLQueriesWithoutHMM(Set<Template> templates){ - logger.debug("Generating SPARQL query candidates..."); - + + /** removes templates which have empty slots */ + protected Set<Template> goodTemplates(Set<Template> templates) + { + Set<Template> remainingTemplates = new HashSet<Template>(); + templates: + for(Template t: templates) + { + for (Slot slot : t.getSlots()) {if(slot.getWords().isEmpty()) {continue templates;} } + remainingTemplates.add(t); + } + return remainingTemplates; + } + + /** There seems to be a bug in the other getWeightedSPARQLQueries... functions, so this is a new implementation + */ + protected SortedSet<WeightedQuery> getWeightedSPARQLQueriesNew(Set<Template> templates) + { + logger.debug("Generating SPARQL query candidates (new implementation)..."); + + List<String> vars = new LinkedList<String>(); + if(templates.isEmpty()) throw new AssertionError("no templates"); + templates = goodTemplates(templates); + if(templates.isEmpty()) throw new AssertionError("no good templates"); + Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { @Override public int compare(Slot o1, Slot o2) { + System.err.println(o1.getToken()); + System.err.println(o2.getToken()); + if(o1.getToken().equalsIgnoreCase("river")||o2.getToken().equalsIgnoreCase("river")) + { + int nop = 5; + System.err.println(nop); + } if(o1.getSlotType() == o2.getSlotType()){ return o1.getToken().compareTo(o2.getToken()); } else { @@ -643,18 +673,59 @@ } } }); - slot2Allocations = Collections.synchronizedMap(new HashMap<Slot, Set<Allocation>>()); +// slot2Allocations = Collections.synchronizedMap(slot2Allocations); + SortedSet<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); + + SortedSet<WeightedQuery> queries = new TreeSet<WeightedQuery>(); + + for(Template t : templates) + { +// logger.debug("Processing template:\n" + t.toString()); + for (Slot slot : t.getSlots()) + { + // get candidates for slot + if(!slot2Allocations.containsKey(slot)) + { + slot2Allocations.put(slot,new SlotProcessor(slot).computeAllocations(slot)); + } + } + } + logger.info(slot2Allocations.size()+" allocations: "+slot2Allocations); + + if(1==1) System.exit(1); + return queries; + } + @SuppressWarnings("unused") private SortedSet<WeightedQuery> getWeightedSPARQLQueriesWithoutHMM(Set<Template> templates){ + logger.debug("Generating SPARQL query candidates..."); + + SortedMap<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(); +// new Comparator<Slot>() { +// +// @Override +// public int compare(Slot o1, Slot o2) { +// if(o1.equals(o2)) return 0; +// return -1; +//// if(o1.getSlotType() == o2.getSlotType()){ +//// return o1.getToken().compareTo(o2.getToken()); +//// } else { +//// return -1; +//// } +// } +// }); + slot2Allocations = Collections.synchronizedSortedMap(slot2Allocations); + + SortedSet<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); for(Template t : templates) { - logger.info("Processing template:\n" + t.toString()); + logger.debug("Processing template:\n" + t.toString()); // Set<Allocation> allocations = new TreeSet<Allocation>(); boolean containsRegex = t.getQuery().toString().toLowerCase().contains("(regex("); - ExecutorService executor = Executors.newFixedThreadPool(t.getSlots().size()); + ExecutorService executor = Executors.newSingleThreadExecutor();//Executors.newFixedThreadPool(t.getSlots().size()); List<Future<Map<Slot, SortedSet<Allocation>>>> list = new ArrayList<Future<Map<Slot, SortedSet<Allocation>>>>(); long startTime = System.currentTimeMillis(); @@ -670,7 +741,8 @@ for (Future<Map<Slot, SortedSet<Allocation>>> future : list) { try { Map<Slot, SortedSet<Allocation>> result = future.get(); - Entry<Slot, SortedSet<Allocation>> item = result.entrySet().iterator().next(); + + Entry<Slot, SortedSet<Allocation>> item = result.entrySet().iterator().next(); slot2Allocations.put(item.getKey(), item.getValue()); } catch (InterruptedException e) { e.printStackTrace(); @@ -743,9 +815,22 @@ queries.clear(); queries.addAll(tmp); tmp.clear(); - } + } - for(Slot slot : sortedSlots){ + Set<Slot> unhandledSlots = new HashSet<Slot>(sortedSlots); + unhandledSlots.removeAll(slot2Allocations.keySet()); + if(!unhandledSlots.isEmpty()) + { + logger.error("the following slots are unhandled: "+unhandledSlots); + } + for(Slot slot : sortedSlots) + { + Set<Allocation> allocations = slot2Allocations.get(slot); + if(allocations==null) + { + System.err.println("no allocations for slot "+slot); + + } if(!slot2Allocations.get(slot).isEmpty()){ for(Allocation a : slot2Allocations.get(slot)){ for(WeightedQuery query : queries){ @@ -832,9 +917,10 @@ if(!drop){ if(slot.getSlotType() == SlotType.RESOURCE){//avoid queries where predicate is data property and object resource->add REGEX filter in this case + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ SPARQL_Value object = triple.getValue(); -// if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position + if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position // SPARQL_Property predicate = triple.getProperty(); // if(!predicate.isVariable()){//only consider triple where predicate is URI // String predicateURI = predicate.getName().replace("<", "").replace(">", ""); @@ -849,7 +935,7 @@ // } // } else { // -// } + } } } else { q.replaceVarWithURI(slot.getAnchor(), a.getUri()); @@ -1306,6 +1392,7 @@ return indexResultItems; } + /** Computes candidates for a slot by using an index. * */ class SlotProcessor implements Callable<Map<Slot, SortedSet<Allocation>>>{ private Slot slot; @@ -1322,7 +1409,7 @@ } private SortedSet<Allocation> computeAllocations(Slot slot){ - logger.debug("Computing allocations for slot: " + slot); + logger.trace("Computing allocations for slot: " + slot); SortedSet<Allocation> allocations = new TreeSet<Allocation>(); Index index = getIndexBySlotType(slot); @@ -1378,7 +1465,7 @@ normProminenceValues(allocations); computeScore(allocations); - logger.debug("Found " + allocations.size() + " allocations for slot " + slot); + logger.trace(allocations.size() + " allocations for slot " + slot); return new TreeSet<Allocation>(allocations); } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java 2012-11-02 17:47:38 UTC (rev 3867) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java 2012-11-07 18:13:41 UTC (rev 3868) @@ -762,7 +762,7 @@ learnedPos++; List<String> results; try { - logger.info("Testing query:\n" + query); + logger.debug("Testing query:\n" + query); com.hp.hpl.jena.query.Query q = QueryFactory.create(query.getQuery().toString(), Syntax.syntaxARQ); q.setLimit(1); ResultSet rs = executeSelect(q.toString()); @@ -805,7 +805,7 @@ } else if(queryType == SPARQL_QueryType.ASK){ for(WeightedQuery query : queries){ learnedPos++; - logger.info("Testing query:\n" + query); + logger.debug("Testing query:\n" + query); boolean result = executeAskQuery(query.getQuery().toString()); learnedSPARQLQueries.add(query); // if(stopIfQueryResultNotEmpty && result){ Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java 2012-11-02 17:47:38 UTC (rev 3867) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java 2012-11-07 18:13:41 UTC (rev 3868) @@ -5,7 +5,7 @@ import java.util.Iterator; import java.util.List; -public class Slot implements Serializable{ +public class Slot implements Serializable, Comparable<Slot> { private static final long serialVersionUID = 8672756914248710435L; @@ -164,7 +164,7 @@ if (getClass() != obj.getClass()) return false; Slot other = (Slot) obj; - if(other.type == type && other.token == token){ + if(other.type == type && other.token.equals(token)){ return true; } return false; @@ -178,6 +178,23 @@ result = prime * result + ((type == null) ? 0 : type.hashCode()); return result; } + + @Override + public int compareTo(Slot o) + { + if(this.equals(o)) return 0; + // sort by slot type + int t = type.compareTo(o.type); + if(t!=0) return t; + return token.compareTo(o.token); + // sort by + +// if(o1.getSlotType() == o2.getSlotType()){ +// return o1.getToken().compareTo(o2.getToken()); +// } else { +// return -1; +// } + } } Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-11-02 17:47:38 UTC (rev 3867) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-11-07 18:13:41 UTC (rev 3868) @@ -1,6 +1,6 @@ package org.dllearner.algorithm.tbsl.learning; -import static org.junit.Assert.*; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.BufferedReader; import java.io.File; @@ -11,7 +11,6 @@ import java.io.InputStreamReader; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; -import java.io.OutputStream; import java.io.PrintWriter; import java.io.Serializable; import java.io.UnsupportedEncodingException; @@ -24,6 +23,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -31,8 +31,10 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; +import java.util.SortedSet; import java.util.Stack; import java.util.TreeMap; import java.util.TreeSet; @@ -117,9 +119,10 @@ private static final File evaluationFolder = new File("cache/evaluation"); private static final boolean DBPEDIA_PRETAGGED = true; private static final boolean OXFORD_PRETAGGED = false; - private static final int MAX_NUMBER_OF_QUESTIONS = 20; - private static final boolean WHITELIST_ONLY = true; + private static final int MAX_NUMBER_OF_QUESTIONS = Integer.MAX_VALUE; + private static final boolean WHITELIST_ONLY = false; private static final Set<Integer> WHITELIST = Collections.unmodifiableSet(new HashSet<Integer>(Arrays.asList(new Integer[] {4}))); + private static final boolean GENERATE_HTML_ONLY = false; @Test public void testDBpedia() throws Exception { @@ -430,7 +433,7 @@ public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache,Knowledgebase kb, Model model, MappingBasedIndex index,boolean pretagged) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { - evaluateAndWrite(title,referenceXML,endpoint,cache,kb,model,index,pretagged); + if(!GENERATE_HTML_ONLY) {evaluateAndWrite(title,referenceXML,endpoint,cache,kb,model,index,pretagged);} generateHTML(title); // if(evaluation.numberOfCorrectAnswers<3) {fail("only " + evaluation.numberOfCorrectAnswers+" correct answers.");} @@ -529,12 +532,27 @@ { evaluation.correctlyAnsweredQuestions.add(question); evaluation.numberOfCorrectAnswers++; + evaluation.question2JaccardOfAnswers.put(question,1.0); } else { - evaluation.incorrectlyAnsweredQuestions.add(question); - logger.debug("learned queries differing. reference query:\n"+referenceQuery+"\nsuspect query:\n"+suspectQuery); - logger.debug("learned answers differing: reference answers:\n"+reference.id2Answers.get(i)+"\nsuspect answers:\n"+suspect.id2Answers.get(i)); + Set<String> intersection = new HashSet<String>(reference.id2Answers.get(i)); + intersection.retainAll(suspect.id2Answers.get(i)); + if(!intersection.isEmpty()) + { + evaluation.partlyCorrectlyAnsweredQuestions.add(question); + evaluation.numberOfPartlyCorrectAnswers++; + Set<String> union = new HashSet<String>(reference.id2Answers.get(i)); + union.addAll(suspect.id2Answers.get(i)); + evaluation.question2JaccardOfAnswers.put(question,((double)intersection.size())/union.size()); + } else + { + evaluation.incorrectlyAnsweredQuestions.add(question); + evaluation.question2JaccardOfAnswers.put(question,0.0); + logger.debug("learned queries differing. reference query:\n"+referenceQuery+"\nsuspect query:\n"+suspectQuery); + logger.debug("learned answers differing: reference answers:\n"+reference.id2Answers.get(i)+"\nsuspect answers:\n"+suspect.id2Answers.get(i)); + } + } } return evaluation; @@ -542,18 +560,22 @@ static class Evaluation implements Serializable { - private static final long serialVersionUID = 5L; + private static final long serialVersionUID = 6L; final QueryTestData testData; final QueryTestData referenceData; int numberOfQuestions = 0; - int numberOfAnsweredQuestions = 0; + int numberOfAnsweredQuestions = 0; int numberOfCorrectAnswers = 0; + int numberOfPartlyCorrectAnswers = 0; double precision = 0; double recall = 0; final Set<String> unansweredQuestions = new HashSet<String>(); final Set<String> incorrectlyAnsweredQuestions = new HashSet<String>(); - final Set<String> correctlyAnsweredQuestions = new HashSet<String>(); + final Set<String> correctlyAnsweredQuestions = new HashSet<String>(); + final Set<String> partlyCorrectlyAnsweredQuestions = new HashSet<String>(); + final Map<String,Double> question2JaccardOfAnswers = new HashMap<String,Double>(); + public Evaluation(QueryTestData testData,QueryTestData referenceData) {this.testData = testData;this.referenceData = referenceData;} void computePrecisionAndRecall() // we have at maximum one answer set per question @@ -566,7 +588,7 @@ { StringBuffer sb = new StringBuffer(); sb.append(numberOfAnsweredQuestions+" of "+numberOfQuestions+" questions answered, "); - sb.append(numberOfCorrectAnswers+" correct answers."); + sb.append(numberOfCorrectAnswers+" exactly correct answers, "+numberOfPartlyCorrectAnswers+" partly correct answers."); sb.append("precision: "+precision+", recall: "+recall+"\n"); sb.append("Detailed List: "); sb.append(toHTML()); @@ -578,6 +600,7 @@ StringBuffer sb = new StringBuffer(); sb.append(htmlDetailsList("Unanswered Questions",unansweredQuestions)); sb.append(htmlDetailsList("Wrongly Answered Questions",incorrectlyAnsweredQuestions)); + sb.append(htmlDetailsList("Partly correctly Answered Questions",partlyCorrectlyAnsweredQuestions)); sb.append(htmlDetailsList("Correctly Answered Questions",correctlyAnsweredQuestions)); return sb.toString(); } @@ -640,6 +663,13 @@ if (other.correctlyAnsweredQuestions != null) return false; } else if (!correctlyAnsweredQuestions.equals(other.correctlyAnsweredQuestions)) return false; + + if (partlyCorrectlyAnsweredQuestions == null) + { + if (other.partlyCorrectlyAnsweredQuestions != null) return false; + } + else if (!partlyCorrectlyAnsweredQuestions.equals(other.partlyCorrectlyAnsweredQuestions)) return false; + if (incorrectlyAnsweredQuestions == null) { if (other.incorrectlyAnsweredQuestions != null) return false; @@ -1000,14 +1030,14 @@ assertTrue(entities[i][1]+"!="+uri+" "+items,entities[i][1].equals(uri)||entities[i][1].equals(secondUri)); } } - + /*@Test*/ public void testSolrGoodResults() { Knowledgebase dbpedia = createDBpediaLiveKnowledgebase(dbpediaLiveCache); - + testIndex(dbpedia.getResourceIndex(),new String[][] {{"Brooklyn Bridge","http://dbpedia.org/resource/Brooklyn_Bridge"},{"Estonia","http://dbpedia.org/resource/Estonia"}, - {"Germany","http://dbpedia.org/resource/Germany"}}); + {"Germany","http://dbpedia.org/resource/Germany"}}); testIndex(dbpedia.getPropertyIndex(),new String[][] {{"born in","http://dbpedia.org/ontology/birthPlace"}}); } @@ -1282,16 +1312,38 @@ return sbAnswers.toString(); } - /** Generates the HTML string content for one of the 3 colored bars which represent the correctly, incorrectly and unanswered question. + static <K,V extends Comparable<? super V>> SortedSet<Map.Entry<K,V>> entriesSortedByValues(Map<K,V> map) { + SortedSet<Map.Entry<K,V>> sortedEntries = new TreeSet<Map.Entry<K,V>>( + new Comparator<Map.Entry<K,V>>() { + @Override public int compare(Map.Entry<K,V> e1, Map.Entry<K,V> e2) { + int res = e1.getValue().compareTo(e2.getValue()); + return res != 0 ? res : 1; // Special fix to preserve items with equal values + } + } + ); + sortedEntries.addAll(map.entrySet()); + return sortedEntries; + } + + /** Generates the HTML string content for one of the 4 colored bars which represent the correctly, incorrectly and unanswered question. * Also creates and links to a file which contains the questions.*/ - private static String createColoredColumn(/*@NonNull*/ File link,/*@NonNull*/ String title,/*@NonNull*/ String color,/*@NonNull*/ Collection<String> questions, int numberOfQuestionsTotal, boolean queriesAvailable, Evaluation evaluation) + private static String createColoredColumn(/*@NonNull*/ File link,/*@NonNull*/ String title,/*@NonNull*/ String color,/*@NonNull*/ Collection<String> questions, int numberOfQuestionsTotal, boolean queriesAvailable,boolean jaccard, Evaluation evaluation) { final StringBuilder sb = new StringBuilder(); - sb.append("<a href='"+link.getAbsolutePath()+"' title='"+title+"'>"); + sb.append("<a href='"+link.getAbsolutePath()+"' title='"+title+" ("+questions.size()+"/"+(numberOfQuestionsTotal==0?"":numberOfQuestionsTotal)+")'>"); sb.append("<div style='float:left;width:"+100.0*questions.size()/numberOfQuestionsTotal+"%;height:1em;background-color:"+color+";'></div>"); sb.append("</a>"); + // link.getParentFile().mkdirs(); + Collection<String> sortedQuestions; + if(jaccard) // sort by jaccard descending + { + sortedQuestions = new LinkedList<String>(); + SortedMap<String,Double> map = new TreeMap<String,Double>(); + for(String question : questions) {map.put(question, 1-evaluation.question2JaccardOfAnswers.get(question));} - // link.getParentFile().mkdirs(); + for(Entry<String,Double> e: entriesSortedByValues(map)) {sortedQuestions.add(e.getKey());} + } else sortedQuestions = questions; + try { PrintWriter out = new PrintWriter(link); @@ -1301,8 +1353,9 @@ out.println("<!DOCTYPE html><html>\n<head><title>"+title+"</title></head>\n<body>\n<table border='1'>"); if(queriesAvailable) { - out.println("<tr><th>Question</th><th>Learned Query</th><th>Reference Query</th><th>Learned Answers</th><th>Reference Answers</th><th>Error Type</th></tr>"); - for(String question: questions) + out.println("<tr><th>Question</th><th>Learned Query</th><th>Reference Query</th><th>Learned Answers</th><th>Reference Answers</th><th>Error Type</th>"+ + (jaccard?"<th>jaccard</th>":"")+"</tr>"); + for(String question: sortedQuestions) { Integer id = question2Id.get(question); if(evaluation.testData.id2Answers.get(id)==null) {System.err.println(question);continue;} @@ -1312,12 +1365,13 @@ "<td><code><pre>"+escapePre(evaluation.referenceData.id2Query.get(id))+"</pre></code></td>"+ "<td><ul>"+getAnswerHTMLList(evaluation.testData.id2Answers.get(id).toArray(new String[0]))+"</ul></td>"+ "<td><ul>"+getAnswerHTMLList(evaluation.referenceData.id2Answers.get(id).toArray(new String[0]))+"</ul></td>"+ - "<td>"+evaluation.testData.id2LearnStatus.get(id)+"</td></tr>"); + "<td>"+evaluation.testData.id2LearnStatus.get(id)+"</td>"+ + "<td>"+(jaccard?evaluation.question2JaccardOfAnswers.get(question):"")+"</td></tr>"); } } else - { + { out.println("<tr><th>Question</th><th>Error Type</th></tr>"); - for(String question: questions) + for(String question: sortedQuestions) { Integer id = question2Id.get(question); if(id==null) {System.err.println(question);continue;} @@ -1350,6 +1404,7 @@ out.println("</style></head>"); out.println("<body>"); out.println(diffHTML("Correctly Answered Questions (precision and recall = 1)", from.correctlyAnsweredQuestions, to.correctlyAnsweredQuestions)); + out.println(diffHTML("Partly correctly Answered Questions", from.partlyCorrectlyAnsweredQuestions, to.partlyCorrectlyAnsweredQuestions)); out.println(diffHTML("Incorrectly Answered Questions", from.incorrectlyAnsweredQuestions, to.incorrectlyAnsweredQuestions)); out.println(diffHTML("Unanswered Questions", from.unansweredQuestions, to.unansweredQuestions)); out.println("</body>\n</html>"); @@ -1390,9 +1445,10 @@ } sb2.append("</td><td width='100%'>"); sb2.append("<div style='width:100%;height:1em;border:solid 1px;'>"); - sb2.append(createColoredColumn(new File(folder,"correctly_answered.html"), "Correctly Answered Questions", "green", e.correctlyAnsweredQuestions, e.numberOfQuestions,true,e)); - sb2.append(createColoredColumn(new File(folder,"incorrectly_answered.html"), "Incorrectly Answered Questions", "orange", e.incorrectlyAnsweredQuestions, e.numberOfQuestions,true,e)); - sb2.append(createColoredColumn(new File(folder,"unanswered.html"), "Unanswered Questions", "red", e.unansweredQuestions, e.numberOfQuestions,false,e)); + sb2.append(createColoredColumn(new File(folder,"correctly_answered.html"), "Correctly Answered Questions", "green", e.correctlyAnsweredQuestions, e.numberOfQuestions,true,false,e)); + sb2.append(createColoredColumn(new File(folder,"partly_correctly_answered.html"), "Partly Correctly Answered Questions", "gold", e.partlyCorrectlyAnsweredQuestions, e.numberOfQuestions,true,true,e)); + sb2.append(createColoredColumn(new File(folder,"incorrectly_answered.html"), "Incorrectly Answered Questions", "darkorange", e.incorrectlyAnsweredQuestions, e.numberOfQuestions,true,false,e)); + sb2.append(createColoredColumn(new File(folder,"unanswered.html"), "Unanswered Questions", "red", e.unansweredQuestions, e.numberOfQuestions,false,false,e)); sb2.append("<span style='width:1000px;'></span>"); sb2.append("</td></tr>\n"); last = e; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-11-08 18:25:55
|
Revision: 3869 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3869&view=rev Author: kirdie Date: 2012-11-08 18:25:48 +0000 (Thu, 08 Nov 2012) Log Message: ----------- improvement of new learner. Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-11-07 18:13:41 UTC (rev 3868) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-11-08 18:25:48 UTC (rev 3869) @@ -76,6 +76,7 @@ import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.reasoning.SPARQLReasoner; import org.ini4j.Options; +import org.openjena.atlas.logging.Log; import org.semanticweb.owlapi.model.IRI; import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; import com.hp.hpl.jena.ontology.OntModelSpec; @@ -654,27 +655,9 @@ if(templates.isEmpty()) throw new AssertionError("no templates"); templates = goodTemplates(templates); if(templates.isEmpty()) throw new AssertionError("no good templates"); - - Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { + logger.debug(templates.size()+" good templates found."); + Map<Slot, Set<Allocation>> slot2Allocations = Collections.synchronizedSortedMap(new TreeMap<Slot, Set<Allocation>>()); - @Override - public int compare(Slot o1, Slot o2) { - System.err.println(o1.getToken()); - System.err.println(o2.getToken()); - if(o1.getToken().equalsIgnoreCase("river")||o2.getToken().equalsIgnoreCase("river")) - { - int nop = 5; - System.err.println(nop); - } - if(o1.getSlotType() == o2.getSlotType()){ - return o1.getToken().compareTo(o2.getToken()); - } else { - return -1; - } - } - }); -// slot2Allocations = Collections.synchronizedMap(slot2Allocations); - SortedSet<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); SortedSet<WeightedQuery> queries = new TreeSet<WeightedQuery>(); @@ -725,31 +708,25 @@ // Set<Allocation> allocations = new TreeSet<Allocation>(); boolean containsRegex = t.getQuery().toString().toLowerCase().contains("(regex("); - ExecutorService executor = Executors.newSingleThreadExecutor();//Executors.newFixedThreadPool(t.getSlots().size()); - List<Future<Map<Slot, SortedSet<Allocation>>>> list = new ArrayList<Future<Map<Slot, SortedSet<Allocation>>>>(); + ExecutorService executor = Executors.newSingleThreadExecutor();//Executors.newFixedThreadPool(t.getSlots().size()); long startTime = System.currentTimeMillis(); - + Map<Future,Slot> futureToSlot = new HashMap<Future,Slot>(); + for (Slot slot : t.getSlots()) { if(!slot2Allocations.containsKey(slot)){ - Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot); - Future<Map<Slot, SortedSet<Allocation>>> submit = executor.submit(worker); - list.add(submit); + Callable<SortedSet<Allocation>> worker = new SlotProcessor(slot); + Future<SortedSet<Allocation>> submit = executor.submit(worker); + futureToSlot.put(submit, slot); } } - for (Future<Map<Slot, SortedSet<Allocation>>> future : list) { + for (Future<SortedSet<Allocation>> future : futureToSlot.keySet()) + { try { - Map<Slot, SortedSet<Allocation>> result = future.get(); - - Entry<Slot, SortedSet<Allocation>> item = result.entrySet().iterator().next(); - slot2Allocations.put(item.getKey(), item.getValue()); - } catch (InterruptedException e) { - e.printStackTrace(); - } catch (ExecutionException e) { -// e.printStackTrace(); - throw new RuntimeException(e); - } + SortedSet<Allocation> result = future.get(); + slot2Allocations.put(futureToSlot.get(future), result); + } catch (InterruptedException e) {e.printStackTrace();} catch (ExecutionException e) {e.printStackTrace();throw new RuntimeException(e);} } executor.shutdown(); @@ -1393,19 +1370,18 @@ } /** Computes candidates for a slot by using an index. * */ - class SlotProcessor implements Callable<Map<Slot, SortedSet<Allocation>>>{ + class SlotProcessor implements Callable<SortedSet<Allocation>> + { + public final Slot slot; - private Slot slot; - public SlotProcessor(Slot slot) { this.slot = slot; } @Override - public Map<Slot, SortedSet<Allocation>> call() throws Exception { - Map<Slot, SortedSet<Allocation>> result = new HashMap<Slot, SortedSet<Allocation>>(); - result.put(slot, computeAllocations(slot)); - return result; + public SortedSet<Allocation> call() throws Exception + { + return computeAllocations(slot); } private SortedSet<Allocation> computeAllocations(Slot slot){ Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-11-07 18:13:41 UTC (rev 3868) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-11-08 18:25:48 UTC (rev 3869) @@ -115,31 +115,32 @@ // problem mit "In/IN which/WDT films/NNS did/VBD Julia/NNP Roberts/NNP as/RB well/RB as/IN Richard/NNP Gere/NNP play/NN" public class SPARQLTemplateBasedLearner3Test { - private static final boolean USE_HMM = false; - private static final File evaluationFolder = new File("cache/evaluation"); - private static final boolean DBPEDIA_PRETAGGED = true; - private static final boolean OXFORD_PRETAGGED = false; - private static final int MAX_NUMBER_OF_QUESTIONS = Integer.MAX_VALUE; - private static final boolean WHITELIST_ONLY = false; - private static final Set<Integer> WHITELIST = Collections.unmodifiableSet(new HashSet<Integer>(Arrays.asList(new Integer[] {4}))); - private static final boolean GENERATE_HTML_ONLY = false; + protected static final boolean USE_HMM = false; + protected static final File evaluationFolder = new File("cache/evaluation"); + protected static final boolean DBPEDIA_PRETAGGED = true; + protected static final boolean OXFORD_PRETAGGED = false; + protected static final int MAX_NUMBER_OF_QUESTIONS = Integer.MAX_VALUE; + protected static final boolean WHITELIST_ONLY = false; + protected static final Set<Integer> WHITELIST = Collections.unmodifiableSet(new HashSet<Integer>(Arrays.asList(new Integer[] {4}))); + protected static final boolean GENERATE_HTML_ONLY = false; + protected static final int MAX_THREADS = 10; @Test public void testDBpedia() throws Exception { File file = generateTestDataIfNecessary( new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), - SparqlEndpoint.getEndpointDBpedia(), + SparqlEndpoint.getEndpointDBpediaLiveAKSW(), dbpediaLiveCache); test("QALD 2 Benchmark ideally tagged", file,SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache,dbpediaLiveKnowledgebase,null,null,DBPEDIA_PRETAGGED); } - // private char[] hmmHtmlRow(String question, String string, String string2, String string3, Set<String> set, Set<String> set2, + // protected char[] hmmHtmlRow(String question, String string, String string2, String string3, Set<String> set, Set<String> set2, // Set<String> set3, LearnStatus learnStatus, LearnStatus learnStatus2) // { // return null; // } - private static boolean probablySparqlSelectQuery(String s) + protected static boolean probablySparqlSelectQuery(String s) { s=s.toLowerCase(); return s.contains("select")&&s.contains("{")&&s.contains("}"); @@ -149,7 +150,7 @@ * transformed by escaping HTML characters, setting fixed width on SPARQL queries and shortening and linking of dbpedia resource URIs. */ // unescaped form from the top: <tr><td>t(o_1)</td>...<td>t(o_n)</td></tr> - private static String htmlTableTr(Object... tds) + protected static String htmlTableTr(Object... tds) { System.out.println(); StringBuilder sb = new StringBuilder(); @@ -461,7 +462,7 @@ learnedTestData.write();*/ } - private File generateTestDataIfNecessary(final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException + protected File generateTestDataIfNecessary(final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException { String dir = "cache/"+getClass().getSimpleName()+"/"; new File(dir).mkdirs(); @@ -475,7 +476,7 @@ return updatedReferenceXML; } - private void evaluateAndWrite(String title,final File updatedReferenceXML, final SparqlEndpoint endpoint,ExtractionDBCache cache, + protected void evaluateAndWrite(String title,final File updatedReferenceXML, final SparqlEndpoint endpoint,ExtractionDBCache cache, Knowledgebase kb, Model model, MappingBasedIndex index,boolean pretagged) { @@ -498,7 +499,7 @@ * @param reference the test data assumed to be correct. needs to contain the answers for all queries. * @param suspect the test data to compare with the reference. * if a query for a question does not match and the answers are not provided or don't match as well then the question is marked as incorrectly answered.*/ - private static Evaluation evaluate(QueryTestData reference, QueryTestData suspect) + protected static Evaluation evaluate(QueryTestData reference, QueryTestData suspect) { // Diff d = diffTestData(reference,testData); Evaluation evaluation = new Evaluation(suspect,reference); @@ -560,7 +561,7 @@ static class Evaluation implements Serializable { - private static final long serialVersionUID = 6L; + protected static final long serialVersionUID = 6L; final QueryTestData testData; final QueryTestData referenceData; int numberOfQuestions = 0; @@ -640,7 +641,7 @@ return evaluations; } - private static Evaluation read(File file) + protected static Evaluation read(File file) { try { @@ -739,7 +740,7 @@ public final Type type; - private static final long serialVersionUID = 1L; + protected static final long serialVersionUID = 1L; public static final LearnStatus OK = new LearnStatus(Type.OK,null); public static final LearnStatus TIMEOUT = new LearnStatus(Type.TIMEOUT,null); public static final LearnStatus NO_TEMPLATE_FOUND = new LearnStatus(Type.NO_TEMPLATE_FOUND,null); @@ -748,7 +749,7 @@ public final Exception exception; - private LearnStatus(Type type, Exception exception) {this.type=type;this.exception = exception;} + protected LearnStatus(Type type, Exception exception) {this.type=type;this.exception = exception;} public static LearnStatus exceptionStatus(Exception cause) { @@ -782,7 +783,7 @@ * @throws MalformedURLException * @throws ComponentInitException */ - private QueryTestData generateTestDataMultiThreaded(SortedMap<Integer, String> id2Question,Knowledgebase kb,Model model, MappingBasedIndex index,boolean pretagged) + protected QueryTestData generateTestDataMultiThreaded(SortedMap<Integer, String> id2Question,Knowledgebase kb,Model model, MappingBasedIndex index,boolean pretagged) { QueryTestData testData = new QueryTestData(); testData.hmm = USE_HMM; @@ -797,7 +798,7 @@ Map<Integer,Future<LearnStatus>> futures = new HashMap<Integer,Future<LearnStatus>>(); // List<FutureTask> todo = new ArrayList<FutureTask>(id2Question.size()); - ExecutorService service = Executors.newCachedThreadPool();// newFixedThreadPool(1); + ExecutorService service = Executors.newFixedThreadPool(MAX_THREADS); for(int i: id2Question.keySet()) {//if(i != 78)continue; @@ -809,7 +810,7 @@ String question = id2Question.get(i); try { - testData.id2LearnStatus.put(i,futures.get(i).get(30, TimeUnit.MINUTES)); + testData.id2LearnStatus.put(i,futures.get(i).get(30, TimeUnit.SECONDS)); } catch (InterruptedException e) { @@ -868,7 +869,7 @@ * @throws SAXException * @throws TransformerException */ - private void generateUpdatedXML(File originalFile, File updatedFile,SparqlEndpoint endpoint, ExtractionDBCache cache,Model model) throws ParserConfigurationException, SAXException, IOException, TransformerException + protected void generateUpdatedXML(File originalFile, File updatedFile,SparqlEndpoint endpoint, ExtractionDBCache cache,Model model) throws ParserConfigurationException, SAXException, IOException, TransformerException { logger.info(String.format("Updating question file \"%s\" by removing questions without nonempty resource list answer and adding answers.\n" + " Saving the result to file \"%s\"",originalFile.getPath(),updatedFile.getPath())); @@ -951,31 +952,30 @@ // } } - int correctMatches = 0; int numberOfNoTemplateFoundExceptions = 0; int numberOfOtherExceptions = 0; // int successfullTestThreadRuns = 0; /** */ - // private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://live.dbpedia.org/sparql"; + // protected static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://live.dbpedia.org/sparql"; private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3Test.class); - // private SPARQLTemplateBasedLearner2 oxfordLearner; - // private SPARQLTemplateBasedLearner2 dbpediaLiveLearner; + // protected SPARQLTemplateBasedLearner2 oxfordLearner; + // protected SPARQLTemplateBasedLearner2 dbpediaLiveLearner; - // private final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); - private final static ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); + // protected final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); + protected final static ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); - private final Knowledgebase dbpediaLiveKnowledgebase = createDBpediaLiveKnowledgebase(dbpediaLiveCache); + protected final Knowledgebase dbpediaLiveKnowledgebase = createDBpediaLiveKnowledgebase(dbpediaLiveCache); static final SparqlEndpoint dbpediaLiveEndpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); //static SparqlEndpoint oxfordEndpoint; - // private ResultSet executeDBpediaLiveSelect(String query){return SparqlQuery.convertJSONtoResultSet(dbpediaLiveCache.executeSelectQuery(dbpediaLiveEndpoint, query));} + // protected ResultSet executeDBpediaLiveSelect(String query){return SparqlQuery.convertJSONtoResultSet(dbpediaLiveCache.executeSelectQuery(dbpediaLiveEndpoint, query));} - private static boolean httpResponseOK(String url) throws MalformedURLException, IOException + protected static boolean httpResponseOK(String url) throws MalformedURLException, IOException { HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection(); connection.setRequestMethod("HEAD"); @@ -1041,7 +1041,7 @@ testIndex(dbpedia.getPropertyIndex(),new String[][] {{"born in","http://dbpedia.org/ontology/birthPlace"}}); } - private static Knowledgebase createDBpediaLiveKnowledgebase(ExtractionDBCache cache) + protected static Knowledgebase createDBpediaLiveKnowledgebase(ExtractionDBCache cache) { // for(String url : new String[] {resourcesURL,classesURL,propertiesURL,boaPropertiesURL}) @@ -1138,7 +1138,7 @@ return uris; } - private static String urlDecode(String url){ + protected static String urlDecode(String url){ String decodedURL = null; try { decodedURL = URLDecoder.decode(url, "UTF-8"); @@ -1150,7 +1150,7 @@ } - // private ResultSet executeOxfordSelect(String query){return SparqlQuery.convertJSONtoResultSet(oxfordCache.executeSelectQuery(oxfordEndpoint, query));} + // protected ResultSet executeOxfordSelect(String query){return SparqlQuery.convertJSONtoResultSet(oxfordCache.executeSelectQuery(oxfordEndpoint, query));} // @Test public void benchmarkCreateOxfordKnowledgeBase() // { @@ -1164,7 +1164,7 @@ // System.out.println(diff+" millis as a whole, "+diff/1000.0+" millis per run"); // } - // private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) + // protected Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) // { // URL url; // try{url = new URL("http://lgd.aksw.org:8900/sparql");} catch(Exception e) {throw new RuntimeException(e);} @@ -1186,19 +1186,19 @@ // } /** @author konrad * Learns a query for a question and puts it into the given testData object. * */ - private static class LearnQueryCallable implements Callable<LearnStatus> + protected static class LearnQueryCallable implements Callable<LearnStatus> { - private final String question; - // private final String endpoint; - private final int id; - private final QueryTestData testData; + protected final String question; + // protected final String endpoint; + protected final int id; + protected final QueryTestData testData; - static private class POSTaggerHolder + static protected class POSTaggerHolder {static public final PartOfSpeechTagger posTagger = new SynchronizedStanfordPartOfSpeechTagger();} - static private final WordNet wordnet = new WordNet(); - static private final Options options = new Options(); - private final SPARQLTemplateBasedLearner2 learner; + static protected final WordNet wordnet = new WordNet(); + static protected final Options options = new Options(); + protected final SPARQLTemplateBasedLearner2 learner; public LearnQueryCallable(String question, int id, QueryTestData testData, Knowledgebase knowledgeBase,boolean pretagged) { @@ -1294,9 +1294,9 @@ return sb.toString(); } - private static String escapePre(String s) {return s.replace("<", "<").replace(">", ">");} + protected static String escapePre(String s) {return s.replace("<", "<").replace(">", ">");} - private static String getAnswerHTMLList(String[] answers) + protected static String getAnswerHTMLList(String[] answers) { StringBuilder sbAnswers = new StringBuilder(); final int MAX = 10; @@ -1327,7 +1327,7 @@ /** Generates the HTML string content for one of the 4 colored bars which represent the correctly, incorrectly and unanswered question. * Also creates and links to a file which contains the questions.*/ - private static String createColoredColumn(/*@NonNull*/ File link,/*@NonNull*/ String title,/*@NonNull*/ String color,/*@NonNull*/ Collection<String> questions, int numberOfQuestionsTotal, boolean queriesAvailable,boolean jaccard, Evaluation evaluation) + protected static String createColoredColumn(/*@NonNull*/ File link,/*@NonNull*/ String title,/*@NonNull*/ String color,/*@NonNull*/ Collection<String> questions, int numberOfQuestionsTotal, boolean queriesAvailable,boolean jaccard, Evaluation evaluation) { final StringBuilder sb = new StringBuilder(); sb.append("<a href='"+link.getAbsolutePath()+"' title='"+title+" ("+questions.size()+"/"+(numberOfQuestionsTotal==0?"":numberOfQuestionsTotal)+")'>"); @@ -1465,13 +1465,13 @@ } catch (Exception e){throw new RuntimeException(e);} } - // private void updateFile(File originalFile, File updatedFile, String endpoint) + // protected void updateFile(File originalFile, File updatedFile, String endpoint) // { // // // } - // private void test(File file) throws MalformedURLException, InterruptedException + // protected void test(File file) throws MalformedURLException, InterruptedException // { // SortedMap<Integer, String> id2Question = new TreeMap<Integer, String>(); // SortedMap<Integer, String> id2Query = new TreeMap<Integer, String>(); @@ -1547,5 +1547,5 @@ // //fail("Not yet implemented"); // } - private static ResultSet executeSelect(SparqlEndpoint endpoint, String query, ExtractionDBCache cache){return SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));} + protected static ResultSet executeSelect(SparqlEndpoint endpoint, String query, ExtractionDBCache cache){return SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));} } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-11-09 15:06:38
|
Revision: 3871 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3871&view=rev Author: kirdie Date: 2012-11-09 15:06:31 +0000 (Fri, 09 Nov 2012) Log Message: ----------- Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-11-09 11:46:34 UTC (rev 3870) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-11-09 15:06:31 UTC (rev 3871) @@ -96,7 +96,7 @@ public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm { /** synonyms are great but are not used yet by the HMM algorithm. **/ - private static final boolean HMM_USE_SYNONYMS = true; + private static final boolean HMM_USE_SYNONYMS = false; /** The minimum score of items that are accepted from the Sindice search BOA index. **/ private static final Double BOA_THRESHOLD = 0.5; enum Mode {BEST_QUERY, BEST_NON_EMPTY_QUERY} Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java 2012-11-09 11:46:34 UTC (rev 3870) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java 2012-11-09 15:06:31 UTC (rev 3871) @@ -196,5 +196,4 @@ // } } - } Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-11-09 11:46:34 UTC (rev 3870) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-11-09 15:06:31 UTC (rev 3871) @@ -115,17 +115,17 @@ // problem mit "In/IN which/WDT films/NNS did/VBD Julia/NNP Roberts/NNP as/RB well/RB as/IN Richard/NNP Gere/NNP play/NN" public class SPARQLTemplateBasedLearner3Test { - protected static final boolean USE_HMM = false; + protected static final boolean USE_HMM = true; protected static final File evaluationFolder = new File("cache/evaluation"); protected static final boolean DBPEDIA_PRETAGGED = true; protected static final boolean OXFORD_PRETAGGED = false; - protected static final int MAX_NUMBER_OF_QUESTIONS = Integer.MAX_VALUE; + protected static final int MAX_NUMBER_OF_QUESTIONS = 100; protected static final boolean WHITELIST_ONLY = false; protected static final Set<Integer> WHITELIST = Collections.unmodifiableSet(new HashSet<Integer>(Arrays.asList(new Integer[] {4}))); protected static final boolean GENERATE_HTML_ONLY = false; protected static final int MAX_THREADS = 10; - @Test public void testDBpedia() throws Exception + /*@Test*/ public void testDBpedia() throws Exception { File file = generateTestDataIfNecessary( new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), @@ -244,10 +244,6 @@ // "<td><ul>"+getAnswerHTMLList(evaluation.referenceData.id2Answers.get(id).toArray(new String[0]))+"</ul></td>"+ // "<td>"+evaluation.testData.id2LearnStatus.get(id)+"</td></tr>"); - - - - logger.info(questionsOnlyCorrectWithHMM.size()+" questions only correct with hmm, "+ questionsOnlyCorrectWithoutHMM.size()+" questions only correct without hmm"); @@ -256,7 +252,7 @@ out.close(); } - /*@Test*/ public void testOxford() throws Exception + @Test public void testOxford() throws Exception { File file = new File(getClass().getClassLoader().getResource("tbsl/evaluation/oxford_working_questions.xml").getFile()); test("Oxford 19 working questions", file,null,null,null,loadOxfordModel(),getOxfordMappingIndex(),OXFORD_PRETAGGED); @@ -362,39 +358,37 @@ public static Model loadOxfordModel() { // load it into a model because we can and it's faster and doesn't rely on endpoint availability - // the files are located in the paper svn under question-answering-iswc-2012/data + // the files are located in the paper svn under http://diadem.cs.ox.ac.uk/svn/papers/oxpath/question-answering-iswc-2012/data/ and data_v2 // ls *ttl | xargs -I @ echo \"@\", + //find -type f | sed -r "s|\./(.*)|\"\1\",|" final String[] rdf = { - "abbeys-sales-triple.ttl", - "andrewsonline-sales-triple.ttl", - "anker-sales-triple.ttl", - "bairstoweves-sales-triple.ttl", - "ballards-sales-triple.ttl", - "breckon-sales-triple.ttl", - "buckellandballard-sales-triple.ttl", - "carterjonas-sales.ttl", - "churchgribben-salse-triple.ttl", - "findaproperty-sales-triple.ttl", - "johnwood-sales-triple.ttl", - "martinco-letting-triples.ttl", - "scottfraser-letting-triples.ttl", - "scottfraser-sales-triples.ttl", - "scottsymonds-sales-triple.ttl", - "scrivenerandreinger-sales-triple.ttl", - "sequencehome-sales-triple.ttl", - "teampro-sales.ttl", - "thomasmerrifield-sales-triples.ttl", - "wwagency-letting-triple_with-XSD.ttl", - "wwagency-sales-triple_with-XSD.ttl", - // ls links/*ttl | xargs -I @ echo \"@\", - "links/allNear.ttl", - "links/all_walking_distance.ttl", - "links/lgd_data.ttl", - // ls schema/* | xargs -I @ echo \"@\", - "schema/goodRelations.owl", - "schema/LGD-Dump-110406-Ontology.nt", - "schema/ontology.ttl", - "schema/vCard.owl" + "andrewsonline-sales-triple.ttl", + "johnwood-sales-triple.ttl", + "wwagency.ttl", + "findaproperty-sales-triple.ttl", + "carterjonas-sales.ttl", + "breckon.ttl", + "sequencehome.ttl", + "schema/ontology.ttl", + "schema/goodRelations.owl", + "schema/vCard.owl", + "schema/LGD-Dump-110406-Ontology.nt", + "hodson.ttl", + "thomasmerrifield.ttl", + "churchgribben.ttl", + "bairstoweves-sales-triple.ttl", + "anker.ttl", + "martinco-letting-triples.ttl", + "scrivenerandreinger.ttl", + "ballards.ttl", + "teamprop.ttl", + "abbeys-sales-triple.ttl", + "links/ontology.ttl", + "links/lgd_data.ttl", + "links/allNear.ttl", + "links/all_walking_distance.ttl", + "scottsymonds.ttl", + "scottfraser.ttl" }; Model m = ModelFactory.createDefaultModel(); for(final String s:rdf) @@ -810,7 +804,7 @@ String question = id2Question.get(i); try { - testData.id2LearnStatus.put(i,futures.get(i).get(30, TimeUnit.SECONDS)); + testData.id2LearnStatus.put(i,futures.get(i).get(USE_HMM?120:30, TimeUnit.SECONDS)); } catch (InterruptedException e) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-11-19 12:06:06
|
Revision: 3876 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3876&view=rev Author: kirdie Date: 2012-11-19 12:05:59 +0000 (Mon, 19 Nov 2012) Log Message: ----------- SPARQL indices now ignore case. SPARQLClassesIndex now counts all instances as a class that are instances of owl:Class or have at least one instance (some classes are not modelled as owl:Class). Added a test class for the SPARQLClassesIndex. SPARQLTemplatedBasedLearner2 now uses the SPARQLClassesIndex for Classes instead of the normal SPARQL index. The benchmark results should now be better because this applies to many of the 'near' or 'close to' questions. Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Added Paths: ----------- branches/hmm/components-ext/src/test/java/org/dllearner/common/ branches/hmm/components-ext/src/test/java/org/dllearner/common/index/ branches/hmm/components-ext/src/test/java/org/dllearner/common/index/SPARQLClassesIndexTest.java Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-11-19 11:15:47 UTC (rev 3875) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-11-19 12:05:59 UTC (rev 3876) @@ -6,14 +6,12 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; @@ -55,6 +53,7 @@ import org.dllearner.common.index.IndexResultItem; import org.dllearner.common.index.IndexResultSet; import org.dllearner.common.index.MappingBasedIndex; +import org.dllearner.common.index.SPARQLClassesIndex; import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; import org.dllearner.common.index.SPARQLIndex; import org.dllearner.common.index.SPARQLObjectPropertiesIndex; @@ -65,10 +64,6 @@ import org.dllearner.core.ComponentInitException; import org.dllearner.core.LearningProblem; import org.dllearner.core.SparqlQueryLearningAlgorithm; -import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.NamedClass; -import org.dllearner.core.owl.ObjectProperty; -import org.dllearner.core.owl.Thing; import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.ExtractionDBCache; @@ -76,7 +71,6 @@ import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.reasoning.SPARQLReasoner; import org.ini4j.Options; -import org.openjena.atlas.logging.Log; import org.semanticweb.owlapi.model.IRI; import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; import com.hp.hpl.jena.ontology.OntModelSpec; @@ -255,7 +249,7 @@ public SPARQLTemplateBasedLearner2(Model model, MappingBasedIndex mappingBasedIndex, PartOfSpeechTagger posTagger) { - this(model, new SPARQLIndex(model),new SPARQLIndex(model),new SPARQLIndex(model),posTagger); + this(model, new SPARQLIndex(model),new SPARQLClassesIndex(model),new SPARQLIndex(model),posTagger); setMappingIndex(mappingBasedIndex); } @@ -674,7 +668,9 @@ // get candidates for slot if(!slot2Allocations.containsKey(slot)) { - slot2Allocations.put(slot,new SlotProcessor(slot).computeAllocations(slot)); + SortedSet<Allocation> allocations = new SlotProcessor(slot).computeAllocations(slot); + logger.info("allocations for slot "+slot+": "+allocations); + slot2Allocations.put(slot,allocations); } } } @@ -717,6 +713,7 @@ { try { SortedSet<Allocation> result = future.get(); + logger.debug("allocations: "+result); slot2Allocations.put(futureToSlot.get(future), result); } catch (InterruptedException e) {e.printStackTrace();} catch (ExecutionException e) {e.printStackTrace();throw new RuntimeException(e);} } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java 2012-11-19 11:15:47 UTC (rev 3875) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java 2012-11-19 12:05:59 UTC (rev 3876) @@ -2,8 +2,8 @@ import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; - import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.vocabulary.OWL; public class SPARQLClassesIndex extends SPARQLIndex{ @@ -26,13 +26,13 @@ super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + "?s a ?uri.\n" + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + - "FILTER(REGEX(STR(?label), '%s'))}\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; - super.queryWithLabelTemplate = "SELECT DISTINCT * WHERE {\n" + - "?s a ?uri.\n" + + super.queryWithLabelTemplate = "SELECT DISTINCT ?uri ?label WHERE {\n" + + "{?uri a <"+ OWL.Class.getURI() + ">.} UNION {?s a ?uri.}\n"+ "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + - "FILTER(REGEX(STR(?label), '%s'))}\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; } } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-11-19 11:15:47 UTC (rev 3875) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-11-19 12:05:59 UTC (rev 3876) @@ -30,13 +30,13 @@ protected String queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + "?uri a ?type.\n" + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + - "FILTER(REGEX(STR(?label), '%s'))}\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; protected String queryWithLabelTemplate = "SELECT DISTINCT * WHERE {\n" + "?uri a ?type.\n" + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + - "FILTER(REGEX(STR(?label), '%s'))}\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; @@ -108,8 +108,7 @@ public IndexResultSet getResourcesWithScores(String searchTerm, int limit, int offset) { IndexResultSet irs = new IndexResultSet(); - String query = String.format(queryWithLabelTemplate, searchTerm, limit, offset); - + String query = String.format(queryWithLabelTemplate, searchTerm, limit, offset); ResultSet rs = executeSelect(query); QuerySolution qs; Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-11-19 11:15:47 UTC (rev 3875) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-11-19 12:05:59 UTC (rev 3876) @@ -132,8 +132,8 @@ protected static final int QUESTION_OFFSET = 0; protected static final int QUESTION_LIMIT = Integer.MAX_VALUE; - protected static final boolean WHITELIST_ONLY = false; - protected static final Set<Integer> WHITELIST = Collections.unmodifiableSet(new HashSet<Integer>(Arrays.asList(new Integer[] {4}))); + protected static final boolean WHITELIST_ONLY = true; + protected static final Set<Integer> WHITELIST = Collections.unmodifiableSet(new HashSet<Integer>(Arrays.asList(new Integer[] {24}))); protected static final boolean GENERATE_HTML_ONLY = false; protected static final int MAX_THREADS = 4; @@ -364,13 +364,15 @@ /**more will be left out of the xml file */ List<String> questions = new LinkedList<String>(); BufferedReader in = new BufferedReader((new InputStreamReader(getClass().getClassLoader().getResourceAsStream("tbsl/oxford_eval_queries_parsed.txt")))); - int count=0; + int count=-1; for(String line;count<(QUESTION_LIMIT+QUESTION_OFFSET)&&(line=in.readLine())!=null;) { + count++; + if(WHITELIST_ONLY&&!WHITELIST.contains(Integer.valueOf(count))) {continue;} logger.info(count+": "+line); - if(count<QUESTION_OFFSET) {count++;continue;} + if(count<QUESTION_OFFSET) {continue;} String question = line.replace("question: ", "").trim(); - if(!line.trim().isEmpty()) {questions.add(question);count++;} + if(!line.trim().isEmpty()) {questions.add(question);} } in.close(); Model model = loadOxfordModel(); Added: branches/hmm/components-ext/src/test/java/org/dllearner/common/index/SPARQLClassesIndexTest.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/common/index/SPARQLClassesIndexTest.java (rev 0) +++ branches/hmm/components-ext/src/test/java/org/dllearner/common/index/SPARQLClassesIndexTest.java 2012-11-19 12:05:59 UTC (rev 3876) @@ -0,0 +1,24 @@ +/** **/ +package org.dllearner.common.index; + +import static org.junit.Assert.*; +import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner3; +import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner3Test; +import org.junit.Test; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; + +/** @author konrad + * */ +public class SPARQLClassesIndexTest +{ + + @Test public void test() + { + Model m = ModelFactory.createDefaultModel(); + m.read(SPARQLTemplateBasedLearner3Test.class.getClassLoader().getResourceAsStream("oxford/schema/LGD-Dump-110406-Ontology.nt"),null, "TURTLE"); + SPARQLClassesIndex index = new SPARQLClassesIndex(m); + assertFalse(index.getResourcesWithScores("pharmacy").getItems().isEmpty()); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |