From: <ki...@us...> - 2012-08-23 12:50:07
|
Revision: 3831 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3831&view=rev Author: kirdie Date: 2012-08-23 12:49:55 +0000 (Thu, 23 Aug 2012) Log Message: ----------- more oxford test code. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java 2012-08-23 08:40:43 UTC (rev 3830) +++ trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java 2012-08-23 12:49:55 UTC (rev 3831) @@ -460,7 +460,7 @@ T values = map.get(key); if(values == null){ try { - values = (T) values.getClass().newInstance(); + values = (T) value.getClass().newInstance(); } catch (InstantiationException e) { e.printStackTrace(); } catch (IllegalAccessException e) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-23 08:40:43 UTC (rev 3830) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-23 12:49:55 UTC (rev 3831) @@ -93,119 +93,119 @@ import com.jamonapi.MonitorFactory; public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm{ - + enum Mode{ BEST_QUERY, BEST_NON_EMPTY_QUERY } - + private Mode mode = Mode.BEST_QUERY; - + private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); - + private boolean useRemoteEndpointValidation; private boolean stopIfQueryResultNotEmpty; private int maxTestedQueriesPerTemplate = 50; private int maxQueryExecutionTimeInSeconds; private int maxTestedQueries = 200; private int maxIndexResults; - - private SparqlEndpoint endpoint; - private Model model; - + + private SparqlEndpoint endpoint = null; + private Model model = null; + private ExtractionDBCache cache = new ExtractionDBCache("cache"); - + private Index resourcesIndex; private Index classesIndex; private Index propertiesIndex; - + private Index datatypePropertiesIndex; private Index objectPropertiesIndex; - + private MappingBasedIndex mappingIndex; - - private Templator templateGenerator; + + private Templator templateGenerator = null; private Lemmatizer lemmatizer; private PartOfSpeechTagger posTagger; private WordNet wordNet; - + private String question; private int learnedPos = -1; - + private Set<Template> templates; private Map<Template, Collection<? extends Query>> template2Queries; private Map<Slot, List<String>> slot2URI; - + private Collection<WeightedQuery> sparqlQueryCandidates; private SortedSet<WeightedQuery> learnedSPARQLQueries; private SortedSet<WeightedQuery> generatedQueries; - + private SPARQLReasoner reasoner; - + private String currentlyExecutedQuery; - + private boolean dropZeroScoredQueries = true; private boolean useManualMappingsIfExistOnly = true; - + private boolean multiThreaded = true; - + private String [] grammarFiles = new String[]{"tbsl/lexicon/english.lex"}; - + private PopularityMap popularityMap; - + private Set<String> relevantKeywords; - + private boolean useDomainRangeRestriction = true; - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); } - + public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), posTagger, wordNet, options); } - + public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase){ this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), new StanfordPartOfSpeechTagger(), new WordNet(), new Options()); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index){ this(endpoint, index, new StanfordPartOfSpeechTagger()); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options()); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger){ this(endpoint, index, posTagger, new WordNet(), new Options()); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options()); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, WordNet wordNet){ this(endpoint, index, new StanfordPartOfSpeechTagger(), wordNet, new Options()); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, new Options(), new ExtractionDBCache("cache")); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet){ this(endpoint, index, index, index, posTagger, wordNet, new Options(), new ExtractionDBCache("cache")); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ this(endpoint, index, index, index, posTagger, wordNet, options, new ExtractionDBCache("cache")); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ this.endpoint = endpoint; this.resourcesIndex = resourcesIndex; @@ -214,9 +214,9 @@ this.posTagger = posTagger; this.wordNet = wordNet; this.cache = cache; - + setOptions(options); - + if(propertiesIndex instanceof SPARQLPropertiesIndex){ if(propertiesIndex instanceof VirtuosoPropertiesIndex){ datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); @@ -231,29 +231,29 @@ } reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cache); } - + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); } - + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){ this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options()); } - + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){ this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options()); } - + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); } - + public SPARQLTemplateBasedLearner2(Model model, MappingBasedIndex mappingBasedIndex, PartOfSpeechTagger posTagger) { this(model, new SPARQLIndex(model),new SPARQLIndex(model),new SPARQLIndex(model),posTagger); setMappingIndex(mappingBasedIndex); } - + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ this.model = model; this.resourcesIndex = resourcesIndex; @@ -262,9 +262,9 @@ this.posTagger = posTagger; this.wordNet = wordNet; this.cache = cache; - + setOptions(options); - + if(propertiesIndex instanceof SPARQLPropertiesIndex){ if(propertiesIndex instanceof VirtuosoPropertiesIndex){ datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); @@ -279,25 +279,27 @@ } reasoner = new SPARQLReasoner(new LocalModelBasedSparqlEndpointKS(ModelFactory.createOntologyModel(OntModelSpec.RDFS_MEM, model)), cache); } - - public void setGrammarFiles(String[] grammarFiles){ + + public void setGrammarFiles(String[] grammarFiles) + { + if(templateGenerator==null) {throw new AssertionError("Learner not initialized. Please call init();");} templateGenerator.setGrammarFiles(grammarFiles); } - + @Override public void init() throws ComponentInitException { - templateGenerator = new Templator(posTagger, wordNet, grammarFiles); - lemmatizer = new LingPipeLemmatizer(); + templateGenerator = new Templator(posTagger, wordNet, grammarFiles); + lemmatizer = new LingPipeLemmatizer(); } - + public void setMappingIndex(MappingBasedIndex mappingIndex) { this.mappingIndex = mappingIndex; } - + public void setCache(ExtractionDBCache cache) { this.cache = cache; } - + public void setKnowledgebase(Knowledgebase knowledgebase){ this.endpoint = knowledgebase.getEndpoint(); this.resourcesIndex = knowledgebase.getResourceIndex(); @@ -318,28 +320,28 @@ } reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); } - + public void setUseDomainRangeRestriction(boolean useDomainRangeRestriction) { this.useDomainRangeRestriction = useDomainRangeRestriction; } - + /* * Only for Evaluation useful. */ public void setUseIdealTagger(boolean value){ templateGenerator.setUNTAGGED_INPUT(!value); } - + private void setOptions(Options options){ maxIndexResults = Integer.parseInt(options.get("solr.query.limit", "10")); - + maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20")); cache.setMaxExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds); - + useRemoteEndpointValidation = options.get("learning.validationType", "remote").equals("remote") ? true : false; stopIfQueryResultNotEmpty = Boolean.parseBoolean(options.get("learning.stopAfterFirstNonEmptyQueryResult", "true")); maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20")); - + String wordnetPath = options.get("wordnet.dictionary", "tbsl/dict"); wordnetPath = this.getClass().getClassLoader().getResource(wordnetPath).getPath(); System.setProperty("wordnet.database.dir", wordnetPath); @@ -347,20 +349,20 @@ public void setEndpoint(SparqlEndpoint endpoint){ this.endpoint = endpoint; - + reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); reasoner.setCache(cache); reasoner.prepareSubsumptionHierarchy(); } - + public void setQuestion(String question){ this.question = question; } - + public void setUseRemoteEndpointValidation(boolean useRemoteEndpointValidation){ this.useRemoteEndpointValidation = useRemoteEndpointValidation; } - + public int getMaxQueryExecutionTimeInSeconds() { return maxQueryExecutionTimeInSeconds; } @@ -383,11 +385,11 @@ slot2URI = new HashMap<Slot, List<String>>(); relevantKeywords = new HashSet<String>(); currentlyExecutedQuery = null; - -// templateMon.reset(); -// sparqlMon.reset(); + + // templateMon.reset(); + // sparqlMon.reset(); } - + public void learnSPARQLQueries() throws NoTemplateFoundException{ reset(); //generate SPARQL query templates @@ -403,13 +405,13 @@ relevantKeywords.addAll(templateGenerator.getUnknownWords()); if(templates.isEmpty()){ throw new NoTemplateFoundException(); - + } logger.debug("Templates:"); for(Template t : templates){ logger.debug(t); } - + //get the weighted query candidates generatedQueries = getWeightedSPARQLQueries(templates); sparqlQueryCandidates = new ArrayList<WeightedQuery>(); @@ -422,7 +424,7 @@ } i++; } - + if(mode == Mode.BEST_QUERY){ double bestScore = -1; for(WeightedQuery candidate : generatedQueries){ @@ -439,15 +441,15 @@ if(useRemoteEndpointValidation){ //on remote endpoint validateAgainstRemoteEndpoint(sparqlQueryCandidates); } else {//on local model - + } } } - + public SortedSet<WeightedQuery> getGeneratedQueries() { return generatedQueries; } - + public SortedSet<WeightedQuery> getGeneratedQueries(int topN) { SortedSet<WeightedQuery> topNQueries = new TreeSet<WeightedQuery>(); int max = Math.min(topN, generatedQueries.size()); @@ -459,28 +461,28 @@ } return topNQueries; } - + public Set<Template> getTemplates(){ return templates; } - + public List<String> getGeneratedSPARQLQueries(){ List<String> queries = new ArrayList<String>(); for(WeightedQuery wQ : sparqlQueryCandidates){ queries.add(wQ.getQuery().toString()); } - + return queries; } - + public Map<Template, Collection<? extends Query>> getTemplates2SPARQLQueries(){ return template2Queries; } - + public Map<Slot, List<String>> getSlot2URIs(){ return slot2URI; } - + private void normProminenceValues(Set<Allocation> allocations){ double min = 0; double max = 0; @@ -497,25 +499,25 @@ a.setProminence(prominence); } } - + private void computeScore(Set<Allocation> allocations){ double alpha = 0.8; double beta = 1 - alpha; - + for(Allocation a : allocations){ double score = alpha * a.getSimilarity() + beta * a.getProminence(); a.setScore(score); } - + } - + public Set<String> getRelevantKeywords(){ return relevantKeywords; } - + private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ logger.debug("Generating SPARQL query candidates..."); - + Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { @Override @@ -528,23 +530,22 @@ } }); slot2Allocations = Collections.synchronizedMap(new HashMap<Slot, Set<Allocation>>()); - - + + SortedSet<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); - + Set<Allocation> allocations; - + for(Template t : templates){ - logger.info("Processing template:\n" + t.toString()); - System.err.println(QueryFactory.create(t.getQuery().toString(), Syntax.syntaxSPARQL_11)); + logger.info("Processing template:\n" + t.toString()); allocations = new TreeSet<Allocation>(); boolean containsRegex = t.getQuery().toString().toLowerCase().contains("(regex("); - + ExecutorService executor = Executors.newFixedThreadPool(t.getSlots().size()); List<Future<Map<Slot, SortedSet<Allocation>>>> list = new ArrayList<Future<Map<Slot, SortedSet<Allocation>>>>(); - + long startTime = System.currentTimeMillis(); - + for (Slot slot : t.getSlots()) { if(!slot2Allocations.containsKey(slot)){//System.out.println(slot + ": " + slot.hashCode());System.out.println(slot2Allocations); Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot); @@ -552,7 +553,7 @@ list.add(submit); } } - + for (Future<Map<Slot, SortedSet<Allocation>>> future : list) { try { Map<Slot, SortedSet<Allocation>> result = future.get(); @@ -564,10 +565,10 @@ e.printStackTrace(); } } - + executor.shutdown(); - - + + /*for(Slot slot : t.getSlots()){ allocations = slot2Allocations2.get(slot); if(allocations == null){ @@ -575,7 +576,7 @@ slot2Allocations2.put(slot, allocations); } slot2Allocations.put(slot, allocations); - + //for tests add the property URI with http://dbpedia.org/property/ namespace //TODO should be replaced by usage of a separate SOLR index Set<Allocation> tmp = new HashSet<Allocation>(); @@ -590,11 +591,11 @@ allocations.addAll(tmp); }*/ logger.debug("Time needed: " + (System.currentTimeMillis() - startTime) + "ms"); - + Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); Query cleanQuery = t.getQuery(); queries.add(new WeightedQuery(cleanQuery)); - + Set<WeightedQuery> tmp = new TreeSet<WeightedQuery>(); List<Slot> sortedSlots = new ArrayList<Slot>(); Set<Slot> classSlots = new HashSet<Slot>(); @@ -628,125 +629,125 @@ queries.addAll(tmp); tmp.clear(); } - + for(Slot slot : sortedSlots){ if(!slot2Allocations.get(slot).isEmpty()){ for(Allocation a : slot2Allocations.get(slot)){ for(WeightedQuery query : queries){ - Query q = new Query(query.getQuery()); - - boolean drop = false; - if(useDomainRangeRestriction){ - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); -// System.out.println(triple); - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ -// System.out.println(typeTriple); - if(true){//reasoner.isObjectProperty(a.getUri())){ - Description range = reasoner.getRange(new ObjectProperty(a.getUri())); -// System.out.println(a); - if(range != null){ - Set<Description> allRanges = new HashSet<Description>(); - SortedSet<Description> superClasses; - if(range instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(range); - allRanges.addAll(superClasses); - } else { - for(Description nc : range.getChildren()){ - superClasses = reasoner.getSuperClasses(nc); - allRanges.addAll(superClasses); - } - } - allRanges.add(range); - allRanges.remove(new NamedClass(Thing.instance.getURI())); - - Set<Description> allTypes = new HashSet<Description>(); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Description type = new NamedClass(typeURI); - superClasses = reasoner.getSuperClasses(type); - allTypes.addAll(superClasses); - allTypes.add(type); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; - } - } - } else { - drop = true; - } - - } - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ - Description domain = reasoner.getDomain(new ObjectProperty(a.getUri())); -// System.out.println(a); - if(domain != null){ - Set<Description> allDomains = new HashSet<Description>(); + Query q = new Query(query.getQuery()); + + boolean drop = false; + if(useDomainRangeRestriction){ + if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + String objectVar = triple.getValue().getName(); + String subjectVar = triple.getVariable().getName(); + // System.out.println(triple); + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ + // System.out.println(typeTriple); + if(true){//reasoner.isObjectProperty(a.getUri())){ + Description range = reasoner.getRange(new ObjectProperty(a.getUri())); + // System.out.println(a); + if(range != null){ + Set<Description> allRanges = new HashSet<Description>(); SortedSet<Description> superClasses; - if(domain instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(domain); - allDomains.addAll(superClasses); + if(range instanceof NamedClass){ + superClasses = reasoner.getSuperClasses(range); + allRanges.addAll(superClasses); } else { - for(Description nc : domain.getChildren()){ + for(Description nc : range.getChildren()){ superClasses = reasoner.getSuperClasses(nc); - allDomains.addAll(superClasses); + allRanges.addAll(superClasses); } } - allDomains.add(domain); - allDomains.remove(new NamedClass(Thing.instance.getURI())); - + allRanges.add(range); + allRanges.remove(new NamedClass(Thing.instance.getURI())); + Set<Description> allTypes = new HashSet<Description>(); String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); Description type = new NamedClass(typeURI); superClasses = reasoner.getSuperClasses(type); allTypes.addAll(superClasses); allTypes.add(type); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; - } else { - + + if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ + drop = true; + } + } + } else { + drop = true; + } + + } + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ + Description domain = reasoner.getDomain(new ObjectProperty(a.getUri())); + // System.out.println(a); + if(domain != null){ + Set<Description> allDomains = new HashSet<Description>(); + SortedSet<Description> superClasses; + if(domain instanceof NamedClass){ + superClasses = reasoner.getSuperClasses(domain); + allDomains.addAll(superClasses); + } else { + for(Description nc : domain.getChildren()){ + superClasses = reasoner.getSuperClasses(nc); + allDomains.addAll(superClasses); } } + allDomains.add(domain); + allDomains.remove(new NamedClass(Thing.instance.getURI())); + + Set<Description> allTypes = new HashSet<Description>(); + String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); + Description type = new NamedClass(typeURI); + superClasses = reasoner.getSuperClasses(type); + allTypes.addAll(superClasses); + allTypes.add(type); + + if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ + drop = true; + } else { + + } } } } } - - if(!drop){ - if(slot.getSlotType() == SlotType.RESOURCE){//avoid queries where predicate is data property and object resource->add REGEX filter in this case - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - SPARQL_Value object = triple.getValue(); - if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position - SPARQL_Property predicate = triple.getProperty(); - if(!predicate.isVariable()){//only consider triple where predicate is URI - String predicateURI = predicate.getName().replace("<", "").replace(">", ""); - if(isDatatypeProperty(predicateURI)){//if data property - q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } + } + + if(!drop){ + if(slot.getSlotType() == SlotType.RESOURCE){//avoid queries where predicate is data property and object resource->add REGEX filter in this case + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + SPARQL_Value object = triple.getValue(); + if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position + SPARQL_Property predicate = triple.getProperty(); + if(!predicate.isVariable()){//only consider triple where predicate is URI + String predicateURI = predicate.getName().replace("<", "").replace(">", ""); + if(isDatatypeProperty(predicateURI)){//if data property + q.addFilter(new SPARQL_Filter(new SPARQL_Pair( + object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); } else { q.replaceVarWithURI(slot.getAnchor(), a.getUri()); } + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); } - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } } - WeightedQuery w = new WeightedQuery(q); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - w.addAllocations(query.getAllocations()); - w.addAllocation(a); - tmp.add(w); + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); } - - + WeightedQuery w = new WeightedQuery(q); + double newScore = query.getScore() + a.getScore(); + w.setScore(newScore); + w.addAllocations(query.getAllocations()); + w.addAllocation(a); + tmp.add(w); + } + + } } //lower queries with FILTER-REGEX @@ -755,7 +756,7 @@ wQ.setScore(wQ.getScore() - 0.01); } } - + queries.clear(); queries.addAll(tmp);//System.out.println(tmp); tmp.clear(); @@ -776,9 +777,9 @@ } } } - + } - + } else { if(slot.getSlotType() == SlotType.SYMPROPERTY){ for(WeightedQuery wQ : queries){ @@ -800,50 +801,50 @@ List<SPARQL_Triple> typeTriples = wQ.getQuery().getRDFTypeTriples(typeVar); for(SPARQL_Triple typeTriple : typeTriples){ String typeURI = typeTriple.getValue().getName().replace("<", "").replace(">", ""); -// List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction); -// for(Entry<String, Integer> property : mostFrequentProperties){ -// wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey()); -// wQ.setScore(wQ.getScore() + 0.1); -// } + // List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction); + // for(Entry<String, Integer> property : mostFrequentProperties){ + // wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey()); + // wQ.setScore(wQ.getScore() + 0.1); + // } } - + } } } } -// else if(slot.getSlotType() == SlotType.CLASS){ -// String token = slot.getWords().get(0); -// if(slot.getToken().contains("house")){ -// String regexToken = token.replace("houses", "").replace("house", "").trim(); -// try { -// Map<Slot, SortedSet<Allocation>> ret = new SlotProcessor(new Slot(null, SlotType.CLASS, Collections.singletonList("house"))).call(); -// SortedSet<Allocation> alloc = ret.entrySet().iterator().next().getValue(); -// if(alloc != null && !alloc.isEmpty()){ -// String uri = alloc.first().getUri(); -// for(WeightedQuery query : queries){ -// Query q = query.getQuery(); -// for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ -// SPARQL_Term subject = triple.getVariable(); -// SPARQL_Term object = new SPARQL_Term("desc"); -// object.setIsVariable(true); -// object.setIsURI(false); -// q.addCondition(new SPARQL_Triple(subject, new SPARQL_Property("<http://purl.org/goodrelations/v1#description>"), object)); -// q.addFilter(new SPARQL_Filter(new SPARQL_Pair( -// object, "'" + regexToken + "'", SPARQL_PairType.REGEX))); -// } -// q.replaceVarWithURI(slot.getAnchor(), uri); -// -// } -// } -// } catch (Exception e) { -// e.printStackTrace(); -// } -// } -// } - - + // else if(slot.getSlotType() == SlotType.CLASS){ + // String token = slot.getWords().get(0); + // if(slot.getToken().contains("house")){ + // String regexToken = token.replace("houses", "").replace("house", "").trim(); + // try { + // Map<Slot, SortedSet<Allocation>> ret = new SlotProcessor(new Slot(null, SlotType.CLASS, Collections.singletonList("house"))).call(); + // SortedSet<Allocation> alloc = ret.entrySet().iterator().next().getValue(); + // if(alloc != null && !alloc.isEmpty()){ + // String uri = alloc.first().getUri(); + // for(WeightedQuery query : queries){ + // Query q = query.getQuery(); + // for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + // SPARQL_Term subject = triple.getVariable(); + // SPARQL_Term object = new SPARQL_Term("desc"); + // object.setIsVariable(true); + // object.setIsURI(false); + // q.addCondition(new SPARQL_Triple(subject, new SPARQL_Property("<http://purl.org/goodrelations/v1#description>"), object)); + // q.addFilter(new SPARQL_Filter(new SPARQL_Pair( + // object, "'" + regexToken + "'", SPARQL_PairType.REGEX))); + // } + // q.replaceVarWithURI(slot.getAnchor(), uri); + // + // } + // } + // } catch (Exception e) { + // e.printStackTrace(); + // } + // } + // } + + } - + } for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) { WeightedQuery wQ = iterator.next(); @@ -852,9 +853,10 @@ iterator.remove(); } } else { + if(t.getSlots().size()==0) throw new AssertionError("no slots for query "+wQ); wQ.setScore(wQ.getScore()/t.getSlots().size()); } - + } allQueries.addAll(queries); List<Query> qList = new ArrayList<Query>(); @@ -866,7 +868,7 @@ logger.debug("...done in "); return allQueries; } - + private double getProminenceValue(String uri, SlotType type){ Integer popularity = null; if(popularityMap != null){ @@ -890,7 +892,7 @@ query = "SELECT COUNT(*) WHERE {?s ?p <%s>}"; } query = String.format(query, uri); - + ResultSet rs = executeSelect(query); QuerySolution qs; String projectionVar; @@ -903,20 +905,20 @@ if(popularity == null){ popularity = Integer.valueOf(0); } - - -// if(cnt == 0){ -// return 0; -// } -// return Math.log(cnt); + + + // if(cnt == 0){ + // return 0; + // } + // return Math.log(cnt); return popularity; } - + public void setPopularityMap(PopularityMap popularityMap) { this.popularityMap = popularityMap; } - - + + private List<String> pruneList(List<String> words){ List<String> prunedList = new ArrayList<String>(); for(String w1 : words){ @@ -934,13 +936,13 @@ } } logger.info("Pruned list: " + prunedList); -// return getLemmatizedWords(words); + // return getLemmatizedWords(words); return prunedList; } - + private List<String> getLemmatizedWords(List<String> words){ logger.info("Pruning word list " + words + "..."); -// mon.start(); + // mon.start(); List<String> pruned = new ArrayList<String>(); for(String word : words){ //currently only stem single words @@ -952,15 +954,15 @@ pruned.add(lemWord); } } - + } -// mon.stop(); -// logger.info("Done in " + mon.getLastValue() + "ms."); + // mon.stop(); + // logger.info("Done in " + mon.getLastValue() + "ms."); logger.info("Pruned list: " + pruned); return pruned; } - - + + private Index getIndexBySlotType(Slot slot){ Index index = null; SlotType type = slot.getSlotType(); @@ -977,12 +979,12 @@ } return index; } - + private void validateAgainstRemoteEndpoint(Collection<WeightedQuery> queries){ SPARQL_QueryType queryType = queries.iterator().next().getQuery().getQt(); validate(queries, queryType); } - + private void validate(Collection<WeightedQuery> queries, SPARQL_QueryType queryType){ logger.debug("Testing candidate SPARQL queries on remote endpoint..."); sparqlMon.start(); @@ -995,7 +997,7 @@ com.hp.hpl.jena.query.Query q = QueryFactory.create(query.getQuery().toString(), Syntax.syntaxARQ); q.setLimit(1); ResultSet rs = executeSelect(q.toString()); - + results = new ArrayList<String>(); QuerySolution qs; String projectionVar; @@ -1007,7 +1009,7 @@ } else if(qs.get(projectionVar).isURIResource()){ results.add(qs.get(projectionVar).asResource().getURI()); } - + } if(!results.isEmpty()){ try{ @@ -1029,7 +1031,7 @@ } catch (Exception e) { e.printStackTrace(); } - + } } else if(queryType == SPARQL_QueryType.ASK){ for(WeightedQuery query : queries){ @@ -1037,31 +1039,39 @@ logger.debug("Testing query:\n" + query); boolean result = executeAskQuery(query.getQuery().toString()); learnedSPARQLQueries.add(query); -// if(stopIfQueryResultNotEmpty && result){ -// return; -// } + // if(stopIfQueryResultNotEmpty && result){ + // return; + // } if(stopIfQueryResultNotEmpty){ return; } logger.debug("Result: " + result); } } - + sparqlMon.stop(); logger.debug("Done in " + sparqlMon.getLastValue() + "ms."); } - - private boolean executeAskQuery(String query){ - currentlyExecutedQuery = query; - QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); - for(String uri : endpoint.getDefaultGraphURIs()){ - qe.addDefaultGraph(uri); + + private boolean executeAskQuery(String query) + { + if(query==null) throw new NullPointerException("Parameter query == null"); + currentlyExecutedQuery = query; + + boolean ret; + if (model == null) + { + QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); + qe.setDefaultGraphURIs(endpoint.getDefaultGraphURIs()); + ret = qe.execAsk(); } - boolean ret = qe.execAsk(); + else {ret = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model).execAsk();} return ret; } - - private ResultSet executeSelect(String query) { + + private ResultSet executeSelect(String query) + { + if(query==null) throw new NullPointerException("Parameter query == null"); currentlyExecutedQuery = query; ResultSet rs; if (model == null) { @@ -1076,14 +1086,14 @@ rs = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model) .execSelect(); } - + return rs; } - + public String getCurrentlyExecutedQuery() { return currentlyExecutedQuery; } - + public int getLearnedPosition() { if(learnedPos >= 0){ return learnedPos+1; @@ -1112,7 +1122,7 @@ return null; } } - + public SortedSet<WeightedQuery> getLearnedSPARQLQueries() { return learnedSPARQLQueries; } @@ -1126,13 +1136,13 @@ @Override public void setLearningProblem(LearningProblem learningProblem) { // TODO Auto-generated method stub - + } - + class SlotProcessor implements Callable<Map<Slot, SortedSet<Allocation>>>{ - + private Slot slot; - + public SlotProcessor(Slot slot) { this.slot = slot; } @@ -1143,13 +1153,13 @@ result.put(slot, computeAllocations(slot)); return result; } - + private SortedSet<Allocation> computeAllocations(Slot slot){ logger.debug("Computing allocations for slot: " + slot); SortedSet<Allocation> allocations = new TreeSet<Allocation>(); - + Index index = getIndexBySlotType(slot); - + IndexResultSet rs; for(String word : slot.getWords()){ rs = new IndexResultSet(); @@ -1178,33 +1188,33 @@ rs.add(index.getResourcesWithScores(word, 20)); } } - - + + for(IndexResultItem item : rs.getItems()){ double similarity = Similarity.getSimilarity(word, item.getLabel()); -// //get the labels of the redirects and compute the highest similarity -// if(slot.getSlotType() == SlotType.RESOURCE){ -// Set<String> labels = getRedirectLabels(item.getUri()); -// for(String label : labels){ -// double tmp = Similarity.getSimilarity(word, label); -// if(tmp > similarity){ -// similarity = tmp; -// } -// } -// } + // //get the labels of the redirects and compute the highest similarity + // if(slot.getSlotType() == SlotType.RESOURCE){ + // Set<String> labels = getRedirectLabels(item.getUri()); + // for(String label : labels){ + // double tmp = Similarity.getSimilarity(word, label); + // if(tmp > similarity){ + // similarity = tmp; + // } + // } + // } double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); allocations.add(new Allocation(item.getUri(), prominence, similarity)); } - + } - + normProminenceValues(allocations); - + computeScore(allocations); logger.debug("Found " + allocations.size() + " allocations for slot " + slot); return new TreeSet<Allocation>(allocations); } - + private Index getIndexBySlotType(Slot slot){ Index index = null; SlotType type = slot.getSlotType(); @@ -1221,13 +1231,15 @@ } return index; } - + } - - public String getTaggedInput(){ + + public String getTaggedInput() + { + if(templateGenerator==null) {throw new AssertionError("Learner not initialized. Please call init();");} return templateGenerator.getTaggedInput(); } - + private boolean isDatatypeProperty(String uri){ Boolean isDatatypeProperty = null; if(mappingIndex != null){ @@ -1239,7 +1251,7 @@ } return isDatatypeProperty; } - + /** * @param args * @throws NoTemplateFoundException @@ -1253,20 +1265,20 @@ Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources"); Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes"); Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties"); - + SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); learner.init(); - + String question = "What is the highest mountain?"; - + learner.setQuestion(question); learner.learnSPARQLQueries(); System.out.println("Learned query:\n" + learner.getBestSPARQLQuery()); System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType()); System.out.println(learner.getLearnedPosition()); - + } - + } Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-08-23 08:40:43 UTC (rev 3830) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-08-23 12:49:55 UTC (rev 3831) @@ -127,7 +127,7 @@ return irs; } - private ResultSet executeSelect(String query){System.out.println(query); + private ResultSet executeSelect(String query){ ResultSet rs; if(model == null){ if(cache == null){ Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-23 08:40:43 UTC (rev 3830) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-23 12:49:55 UTC (rev 3831) @@ -4,10 +4,8 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; @@ -15,7 +13,6 @@ import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; -import java.net.URL; import java.net.URLDecoder; import java.text.DateFormat; import java.util.Collection; @@ -61,17 +58,13 @@ import org.dllearner.common.index.Index; import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; -import org.dllearner.common.index.SPARQLClassesIndex; -import org.dllearner.common.index.SPARQLIndex; -import org.dllearner.common.index.VirtuosoClassesIndex; -import org.dllearner.common.index.VirtuosoResourcesIndex; import org.dllearner.core.ComponentInitException; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; import org.ini4j.Options; -import org.junit.*; -import org.w3c.dom.DOMException; +import org.junit.Before; +import org.junit.Test; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; @@ -111,16 +104,16 @@ {test("QALD 2 Benchmark ideally tagged", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache);} - @Test public void testOxford() throws IOException + @Test public void generateXMLOxford() throws IOException { Model m = loadOxfordModel(); List<String> questions = new LinkedList<String>(); BufferedReader in = new BufferedReader((new InputStreamReader(getClass().getClassLoader().getResourceAsStream("tbsl/oxford_eval_queries.txt")))); int j=0; - for(String line=in.readLine();line!=null;) + for(String line;(line=in.readLine())!=null;) { j++; - if(j>5) break; + if(j>1) break; if(!line.isEmpty()) {questions.add(line.replace("question: ", ""));} } in.close(); @@ -133,7 +126,8 @@ SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - generateTestDataMultiThreaded(id2Question, null,m,mappingIndex,OXFORD_PRETAGGED); + QueryTestData testData = generateTestDataMultiThreaded(id2Question, null,m,mappingIndex,OXFORD_PRETAGGED); + testData.writeQaldXml(new File("log/test.xml")); } private Model loadOxfordModel() @@ -250,7 +244,7 @@ generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); } - QueryTestData referenceTestData = readQueries(updatedReferenceXML); + QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML); logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); long startLearning = System.currentTimeMillis(); @@ -463,7 +457,7 @@ } } - private static class LearnStatus implements Serializable + public static class LearnStatus implements Serializable { public enum Type {OK, TIMEOUT, NO_TEMPLATE_FOUND,QUERY_RESULT_EMPTY,NO_QUERY_LEARNED,EXCEPTION} @@ -693,14 +687,13 @@ // private SPARQLTemplateBasedLearner2 oxfordLearner; // private SPARQLTemplateBasedLearner2 dbpediaLiveLearner; - private final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); +// private final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); private final static ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); private final Knowledgebase dbpediaLiveKnowledgebase = createDBpediaLiveKnowledgebase(dbpediaLiveCache); static final SparqlEndpoint dbpediaLiveEndpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); //static SparqlEndpoint oxfordEndpoint; - private static final int MAXIMUM_QUESTIONS = Integer.MAX_VALUE; // private ResultSet executeDBpediaLiveSelect(String query){return SparqlQuery.convertJSONtoResultSet(dbpediaLiveCache.executeSelectQuery(dbpediaLiveEndpoint, query));} @@ -732,7 +725,7 @@ Logger.getRootLogger().setLevel(Level.WARN); Logger.getLogger(Templator.class).setLevel(Level.WARN); Logger.getLogger(Parser.class).setLevel(Level.WARN); - Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.WARN); + Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); // Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); logger.setLevel(Level.INFO); // TODO: remove when finishing implementation of this class logger.addAppender(new FileAppender(new SimpleLayout(), "log/"+this.getClass().getSimpleName()+".log", false)); @@ -741,131 +734,7 @@ // oxfordLearner = new SPARQLTemplateBasedLearner2(createOxfordKnowledgebase(oxfordCache)); } - private static class QueryTestData implements Serializable - { - public SortedMap<Integer, String> id2Question = new ConcurrentSkipListMap<Integer, String>(); - public SortedMap<Integer, String> id2Query = new ConcurrentSkipListMap<Integer, String>(); - public SortedMap<Integer, Set<String>> id2Answers = new ConcurrentSkipListMap<Integer, Set<String>>(); - public SortedMap<Integer, LearnStatus> id2LearnStatus = new ConcurrentSkipListMap<Integer, LearnStatus>(); - - private static final String persistancePath = "cache/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+'/'+QueryTestData.class.getSimpleName(); - - public synchronized void write() - { - try - { - ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File(persistancePath))); - oos.writeObject(this); - oos.close(); - } catch(IOException e) {throw new RuntimeException(e);} - } - - public static QueryTestData read() throws FileNotFoundException, IOException - { - try - { - ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(persistancePath))); - QueryTestData testData = (QueryTestData) ois.readObject(); - ois.close(); - return testData; - } - catch (ClassNotFoundException e){throw new RuntimeException(e);} - } - - public QueryTestData generateAnswers(SparqlEndpoint endpoint, ExtractionDBCache cache) - { - if(!id2Answers.isEmpty()) {throw new AssertionError("Answers already existing.");} - for(int i:id2Query.keySet()) - { - Set<String> uris = getUris(endpoint, id2Query.get(i),cache); - id2Answers.put(i, uris); // empty answer set better transfers intended meaning and doesn't cause NPEs in html generation :-) - if(!uris.isEmpty()) {/*id2Answers.put(i, uris);*/} - else {id2LearnStatus.put(i, LearnStatus.QUERY_RESULT_EMPTY);} - } - return this; - } - - - } - - private QueryTestData readQueries(final File file) - { - QueryTestData testData = new QueryTestData(); - try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - DocumentBuilder db = dbf.newDocumentBuilder(); - Document doc = db.parse(file); - doc.getDocumentElement().normalize(); - NodeList questionNodes = doc.getElementsByTagName("question"); - int id; - - for(int i = 0; i < questionNodes.getLength(); i++) - { - if(i>=MAXIMUM_QUESTIONS) break; // TODO: remove - String question; - String query; - Set<String> answers = new HashSet<String>(); - Element questionNode = (Element) questionNodes.item(i); - //read question ID - id = Integer.valueOf(questionNode.getAttribute("id")); - //Read question - question = ((Element)questionNode.getElementsByTagName("string").item(0)).getChildNodes().item(0).getNodeValue().trim(); - //Read SPARQL query - query = ((Element)questionNode.getElementsByTagName("query").item(0)).getChildNodes().item(0).getNodeValue().trim(); - // //Read answers - // answers = new HashSet<String>(); - // NodeList aswersNodes = questionNode.getElementsByTagName("answer"); - // for(int j = 0; j < aswersNodes.getLength(); j++){ - // Element answerNode = (Element) aswersNodes.item(j); - // answers.add(((Element)answerNode.getElementsByTagName("uri").item(0)).getChildNodes().item(0).getNodeValue().trim()); - // } - - if(!query.equals("OUT OF SCOPE")) // marker in qald benchmark file, will create holes interval of ids (e.g. 1,2,5,7) - { - testData.id2Question.put(id, question); - testData.id2Query.put(id, query); - Element answersElement = (Element) questionNode.getElementsByTagName("answers").item(0); - if(answersElement!=null) - { - NodeList answerElements = answersElement.getElementsByTagName("answer"); - for(int j=0; j<answerElements.getLength();j++) - { - String answer = ((Element)answerElements.item(j)).getTextContent(); - answers.add(answer); - } - testData.id2Answers.put(id, answers); - } - } - // question2Answers.put(question, answers); - - } - } catch (DOMException e) { - e.printStackTrace(); - } catch (ParserConfigurationException e) { - e.printStackTrace(); - } catch (SAXException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - // StringBuilder sb = new StringBuilder(); - // for(Entry<Integer, String> e : id2Question.entrySet()){ - // sb.append(e.getKey()+ ": " + extractSentence(e.getValue()) + "\n"); - // } - // try { - // BufferedWriter out = new BufferedWriter(new FileWriter("questions.txt")); - // out.write(sb.toString()); - // out.close(); - // } - // catch (IOException e) - // { - // System.out.println("Exception "); - // - // } - return testData; - } - - protected static Set<String> getUris(final SparqlEndpoint endpoint, final String query, ExtractionDBCache cache) + public static Set<String> getUris(final SparqlEndpoint endpoint, final String query, ExtractionDBCache cache) { if(query==null) {throw new AssertionError("query is null");} if(endpoint==null) {throw new AssertionError("endpoint is null");} @@ -969,8 +838,7 @@ {static public final PartOfSpeechTagger posTagger = new SynchronizedStanfordPartOfSpeechTagger();} static private final WordNet wordnet = new WordNet(); - static private final Options options = new Options(); - private final boolean pretagged; + static private final Options options = new Options(); private final SPARQLTemplateBasedLearner2 learner; public LearnQueryCallable(String question, int id, QueryTestData testData, Knowledgebase knowledgeBase,boolean pretagged) @@ -978,8 +846,8 @@ this.question=question; this.id=id; this.testData=testData; - this.pretagged=pretagged; learner = new SPARQLTemplateBasedLearner2(knowledgeBase,pretagged?null:POSTaggerHolder.posTagger,wordnet,options); + try {learner.init();} catch (ComponentInitException e) {throw new RuntimeException(e);} } public LearnQueryCallable(String question, int id, QueryTestData testData, Model model,MappingBasedIndex index,boolean pretagged) @@ -987,20 +855,15 @@ this.question=question; this.id=id; this.testData=testData; - this.pretagged=pretagged; MappingBasedIndex mappingIndex= new MappingBasedIndex( SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - + learner = new SPARQLTemplateBasedLearner2(model,mappingIndex,pretagged?null:POSTaggerHolder.posTagger); - try { - learner.init(); - } catch (ComponentInitException e) { - e.printStackTrace(); - } + try {learner.init();} catch (ComponentInitException e) {throw new RuntimeException(e);} } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |